00001 #pragma GCC diagnostic push
00002 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
00003 #include <xmlrpc-c/client_simple.hpp>
00004 #pragma GCC diagnostic pop
00005
00006 #include <errno.h>
00007 #include <sstream>
00008 #include <iomanip>
00009 #include <bitset>
00010
00011 #include <boost/tokenizer.hpp>
00012 #include <boost/filesystem.hpp>
00013 #include <boost/algorithm/string.hpp>
00014 #include "art/Framework/Art/artapp.h"
00015 #include "cetlib/BasicPluginFactory.h"
00016
00017 #include "artdaq-core/Core/SimpleQueueReader.hh"
00018 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00019 #include "artdaq-core/Data/RawEvent.hh"
00020
00021 #include "artdaq/Application/AggregatorCore.hh"
00022 #include "artdaq/DAQrate/EventStore.hh"
00023 #include "artdaq/DAQrate/detail/FragCounter.hh"
00024 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
00025
00026
00027 namespace BFS = boost::filesystem;
00028
00029 const std::string artdaq::AggregatorCore::INPUT_EVENTS_STAT_KEY("AggregatorCoreInputEvents");
00030 const std::string artdaq::AggregatorCore::INPUT_WAIT_STAT_KEY("AggregatorCoreInputWaitTime");
00031 const std::string artdaq::AggregatorCore::STORE_EVENT_WAIT_STAT_KEY("AggregatorCoreStoreEventWaitTime");
00032 const std::string artdaq::AggregatorCore::SHM_COPY_TIME_STAT_KEY("AggregatorCoreShmCopyTime");
00033 const std::string artdaq::AggregatorCore::FILE_CHECK_TIME_STAT_KEY("AggregatorCoreFileCheckTime");
00034
00035 namespace artdaq
00036 {
00043 void display_bits(void* memstart, size_t nbytes, std::string sourcename)
00044 {
00045 std::stringstream bitstr;
00046 bitstr << "The " << nbytes << "-byte chunk of memory beginning at " << static_cast<void*>(memstart) << " is : ";
00047
00048 for (unsigned int i = 0; i < nbytes; i++)
00049 {
00050 if (i % 4 == 0)
00051 {
00052 bitstr << "\n";
00053 }
00054
00055 bitstr << std::bitset<8>(*((reinterpret_cast<uint8_t*>(memstart)) + i)) << " ";
00056 }
00057
00058 TLOG_DEBUG(sourcename) << bitstr.str() << TLOG_ENDL;
00059 }
00060 }
00061
00062
00063 artdaq::AggregatorCore::AggregatorCore(int rank, std::string name)
00064 : name_(name)
00065 , art_initialized_(false)
00066 , event_queue_(artdaq::getGlobalQueue(10))
00067 , stop_requested_(false)
00068 , local_pause_requested_(false)
00069 , processing_fragments_(false)
00070 , system_pause_requested_(false)
00071 , previous_run_duration_(-1.0)
00072 , new_transfers_(0)
00073 {
00074 TLOG_DEBUG(name_) << "Constructor" << TLOG_ENDL;
00075 stats_helper_.addMonitoredQuantityName(INPUT_EVENTS_STAT_KEY);
00076 stats_helper_.addMonitoredQuantityName(INPUT_WAIT_STAT_KEY);
00077 stats_helper_.addMonitoredQuantityName(STORE_EVENT_WAIT_STAT_KEY);
00078 stats_helper_.addMonitoredQuantityName(SHM_COPY_TIME_STAT_KEY);
00079 stats_helper_.addMonitoredQuantityName(FILE_CHECK_TIME_STAT_KEY);
00080 metricMan = &metricMan_;
00081 my_rank = rank;
00082 }
00083
00084 artdaq::AggregatorCore::~AggregatorCore()
00085 {
00086 TLOG_DEBUG(name_) << "Destructor" << TLOG_ENDL;
00087 }
00088
00089 bool artdaq::AggregatorCore::initialize(fhicl::ParameterSet const& pset)
00090 {
00091 init_string_ = pset.to_string();
00092 TLOG_DEBUG(name_) << "initialize method called with DAQ " << "ParameterSet = \"" << init_string_ << "\"." << TLOG_ENDL;
00093
00094
00095 fhicl::ParameterSet daq_pset;
00096 try
00097 {
00098 daq_pset = pset.get<fhicl::ParameterSet>("daq");
00099 }
00100 catch (...)
00101 {
00102 TLOG_ERROR(name_)
00103 << "Unable to find the DAQ parameters in the initialization "
00104 << "ParameterSet: \"" + pset.to_string() + "\"." << TLOG_ENDL;
00105 return false;
00106 }
00107 fhicl::ParameterSet agg_pset;
00108 try
00109 {
00110 agg_pset = daq_pset.get<fhicl::ParameterSet>("aggregator");
00111 data_pset_ = agg_pset;
00112 }
00113 catch (...)
00114 {
00115 TLOG_ERROR(name_)
00116 << "Unable to find the aggregator parameters in the DAQ "
00117 << "initialization ParameterSet: \"" + daq_pset.to_string() + "\"." << TLOG_ENDL;
00118 return false;
00119 }
00120 try
00121 {
00122 expected_events_per_bunch_ =
00123 agg_pset.get<size_t>("expected_events_per_bunch");
00124 }
00125 catch (...)
00126 {
00127 TLOG_ERROR(name_)
00128 << "The expected_events_per_bunch parameter was not specified "
00129 << "in the aggregator initialization PSet: \"" << pset.to_string()
00130 << "\"." << TLOG_ENDL;
00131 return false;
00132 }
00133
00134 enq_timeout_ = static_cast<detail::seconds>(agg_pset.get<size_t>("enq_timeout", 5.0));
00135
00136
00137
00138
00139
00140 is_data_logger_ = false;
00141 is_online_monitor_ = false;
00142 is_dispatcher_ = false;
00143 std::string metricsReportingInstanceName = "Data Logger";
00144 bool agtype_was_specified = false;
00145 if (!agtype_was_specified)
00146 {
00147 try
00148 {
00149 is_data_logger_ = agg_pset.get<bool>("is_data_logger");
00150 agtype_was_specified = true;
00151 }
00152 catch (...) {}
00153 }
00154 if (!agtype_was_specified)
00155 {
00156 try
00157 {
00158 is_online_monitor_ = agg_pset.get<bool>("is_online_monitor");
00159 metricsReportingInstanceName = "Online Monitor";
00160 agtype_was_specified = true;
00161 }
00162 catch (...) {}
00163 }
00164 if (!agtype_was_specified)
00165 {
00166 try
00167 {
00168 is_dispatcher_ = agg_pset.get<bool>("is_dispatcher");
00169 metricsReportingInstanceName = "Dispatcher";
00170 agtype_was_specified = true;
00171 }
00172 catch (...) {}
00173 }
00174
00175 if (!agtype_was_specified)
00176 {
00177 throw cet::exception("ConfigurationException", "You must specify one of is_data_logger, is_online_monitor or is_dispatcher");
00178 return false;
00179 }
00180 TLOG_DEBUG(name_) << "Rank " << my_rank
00181 << ", is_data_logger = " << is_data_logger_
00182 << ", is_online_monitor = " << is_online_monitor_
00183 << ", is_dispatcher = " << is_dispatcher_ << TLOG_ENDL;
00184
00185 disk_writing_directory_ = "";
00186 try
00187 {
00188 fhicl::ParameterSet output_pset =
00189 pset.get<fhicl::ParameterSet>("outputs");
00190 fhicl::ParameterSet normalout_pset =
00191 output_pset.get<fhicl::ParameterSet>("normalOutput");
00192
00193 if (!normalout_pset.is_empty())
00194 {
00195 std::string filename = normalout_pset.get<std::string>("fileName", "");
00196 if (filename.size() > 0)
00197 {
00198 size_t pos = filename.rfind("/");
00199 if (pos != std::string::npos)
00200 {
00201 disk_writing_directory_ = filename.substr(0, pos);
00202 }
00203 }
00204 else
00205 {
00206 TLOG_WARNING(name_) << "Problem finding \"fileName\" parameter in \"normalOutput\" RootOutput module FHiCL code" << TLOG_ENDL;
00207 }
00208 }
00209 }
00210 catch (...) {}
00211
00212 std::string xmlrpcClientString =
00213 agg_pset.get<std::string>("xmlrpc_client_list", "");
00214 if (xmlrpcClientString.size() > 0)
00215 {
00216 xmlrpc_client_lists_.clear();
00217 boost::char_separator<char> sep1(";");
00218 boost::tokenizer<boost::char_separator<char>>
00219 primaryTokens(xmlrpcClientString, sep1);
00220 boost::tokenizer<boost::char_separator<char>>::iterator iter1;
00221 boost::tokenizer<boost::char_separator<char>>::iterator
00222 endIter1 = primaryTokens.end();
00223 for (iter1 = primaryTokens.begin(); iter1 != endIter1; ++iter1)
00224 {
00225 boost::char_separator<char> sep2(",");
00226 boost::tokenizer<boost::char_separator<char>>
00227 secondaryTokens(*iter1, sep2);
00228 boost::tokenizer<boost::char_separator<char>>::iterator iter2;
00229 boost::tokenizer<boost::char_separator<char>>::iterator
00230 endIter2 = secondaryTokens.end();
00231 int clientGroup = -1;
00232 std::string url = "";
00233 int loopCount = 0;
00234 for (iter2 = secondaryTokens.begin(); iter2 != endIter2; ++iter2)
00235 {
00236 switch (loopCount)
00237 {
00238 case 0:
00239 url = *iter2;
00240 break;
00241 case 1:
00242 try
00243 {
00244 clientGroup = boost::lexical_cast<int>(*iter2);
00245 }
00246 catch (...) {}
00247 break;
00248 default:
00249 TLOG_WARNING(name_)
00250 << "Unexpected XMLRPC client list element, index = "
00251 << loopCount << ", value = \"" << *iter2 << "\"" << TLOG_ENDL;
00252 }
00253 ++loopCount;
00254 }
00255 if (clientGroup >= 0 && url.size() > 0)
00256 {
00257 int elementsNeeded = clientGroup + 1 - ((int)xmlrpc_client_lists_.size());
00258 for (int idx = 0; idx < elementsNeeded; ++idx)
00259 {
00260 std::vector<std::string> tmpVec;
00261 xmlrpc_client_lists_.push_back(tmpVec);
00262 }
00263 xmlrpc_client_lists_[clientGroup].push_back(url);
00264 }
00265 }
00266 }
00267 double fileSizeMB = agg_pset.get<double>("subrun_size_MB", 0);
00268 file_close_threshold_bytes_ = ((size_t)fileSizeMB * 1024.0 * 1024.0);
00269 file_close_timeout_secs_ = agg_pset.get<time_t>("subrun_duration", 0);
00270 file_close_event_count_ = agg_pset.get<size_t>("subrun_event_count", 0);
00271
00272 inrun_recv_timeout_usec_ = agg_pset.get<size_t>("inrun_recv_timeout_usec", 100000);
00273 endrun_recv_timeout_usec_ = agg_pset.get<size_t>("endrun_recv_timeout_usec", 20000000);
00274 pause_recv_timeout_usec_ = agg_pset.get<size_t>("pause_recv_timeout_usec", 3000000);
00275
00276 onmon_event_prescale_ = agg_pset.get<size_t>("onmon_event_prescale", 1);
00277
00278 filesize_check_interval_seconds_ = agg_pset.get<int32_t>("filesize_check_interval_seconds", 20);
00279 filesize_check_interval_events_ = agg_pset.get<int32_t>("filesize_check_interval_events", 20);
00280
00281
00282 stats_helper_.createCollectors(agg_pset, 50, 20.0, 60.0, INPUT_EVENTS_STAT_KEY);
00283
00284
00285 fhicl::ParameterSet metric_pset;
00286
00287 try
00288 {
00289 metric_pset = daq_pset.get<fhicl::ParameterSet>("metrics");
00290 }
00291 catch (...) {}
00292
00293 if (metric_pset.is_empty())
00294 {
00295 TLOG_INFO(name_) << "No metric plugins appear to be defined" << TLOG_ENDL;
00296 }
00297 try
00298 {
00299 metricMan_.initialize(metric_pset, metricsReportingInstanceName);
00300 }
00301 catch (...)
00302 {
00303 ExceptionHandler(ExceptionHandlerRethrow::no,
00304 "Error loading metrics in AggregatorCore::initialize()");
00305 }
00306
00307 if (event_store_ptr_ == nullptr)
00308 {
00309 artdaq::EventStore::ART_CFGSTRING_FCN* reader = &artapp_string_config;
00310 size_t desired_events_per_bunch = expected_events_per_bunch_;
00311 if (is_online_monitor_ || is_dispatcher_)
00312 {
00313 desired_events_per_bunch = 1;
00314 }
00315 TRACE(36, "Creating EventStore and Starting art thread");
00316 event_store_ptr_.reset(new artdaq::EventStore(agg_pset, desired_events_per_bunch, 1,
00317 init_string_, reader));
00318 TRACE(36, "Done Creating EventStore");
00319 event_store_ptr_->setSeqIDModulus(desired_events_per_bunch);
00320 fhicl::ParameterSet tmp = pset;
00321 tmp.erase("daq");
00322 previous_pset_ = tmp;
00323 }
00324 else
00325 {
00326 fhicl::ParameterSet tmp = pset;
00327 tmp.erase("daq");
00328 if (tmp != previous_pset_)
00329 {
00330 TLOG_ERROR(name_)
00331 << "The art configuration can not be altered after art "
00332 << "has been configured." << TLOG_ENDL;
00333 return false;
00334 }
00335 }
00336
00337 return true;
00338 }
00339
00340 bool artdaq::AggregatorCore::start(art::RunID id)
00341 {
00342 event_count_in_run_ = 0;
00343 event_count_in_subrun_ = 0;
00344 subrun_start_time_ = time(0);
00345 stats_helper_.resetStatistics();
00346 previous_run_duration_ = -1.0;
00347
00348 stop_requested_.store(false);
00349 local_pause_requested_.store(false);
00350 run_id_ = id;
00351 metricMan_.do_start();
00352 event_store_ptr_->startRun(run_id_.run());
00353
00354 logMessage_("Started run " + boost::lexical_cast<std::string>(run_id_.run()));
00355 return true;
00356 }
00357
00358 bool artdaq::AggregatorCore::stop()
00359 {
00360 logMessage_("Stopping run " + boost::lexical_cast<std::string>(run_id_.run()) +
00361 ", " + boost::lexical_cast<std::string>(event_count_in_run_) +
00362 " events received so far.");
00363
00364
00365
00366
00367 stop_requested_.store(true);
00368 return true;
00369 }
00370
00371 bool artdaq::AggregatorCore::pause()
00372 {
00373 logMessage_("Pausing run " + boost::lexical_cast<std::string>(run_id_.run()) +
00374 ", " + boost::lexical_cast<std::string>(event_count_in_run_) +
00375 " events received so far.");
00376
00377
00378
00379
00380 local_pause_requested_.store(true);
00381 return true;
00382 }
00383
00384 bool artdaq::AggregatorCore::resume()
00385 {
00386 event_count_in_subrun_ = 0;
00387 subrun_start_time_ = time(0);
00388 local_pause_requested_.store(false);
00389
00390 logMessage_("Resuming run " + boost::lexical_cast<std::string>(run_id_.run()));
00391 metricMan_.do_start();
00392 event_store_ptr_->startSubrun();
00393 return true;
00394 }
00395
00396 bool artdaq::AggregatorCore::shutdown()
00397 {
00398 int readerReturnValue;
00399 bool endSucceeded = false;
00400 int attemptsToEnd = 1;
00401 endSucceeded = event_store_ptr_->endOfData(readerReturnValue);
00402 while (!endSucceeded && attemptsToEnd < 3)
00403 {
00404 ++attemptsToEnd;
00405 TLOG_DEBUG(name_) << "Retrying EventStore::endOfData()" << TLOG_ENDL;
00406 endSucceeded = event_store_ptr_->endOfData(readerReturnValue);
00407 }
00408 metricMan_.shutdown();
00409
00410 return endSucceeded;
00411 }
00412
00413 bool artdaq::AggregatorCore::soft_initialize(fhicl::ParameterSet const& pset)
00414 {
00415 TLOG_DEBUG(name_) << "soft_initialize method called with DAQ "
00416 << "ParameterSet = \"" << pset.to_string()
00417 << "\"." << TLOG_ENDL;
00418 return true;
00419 }
00420
00421 bool artdaq::AggregatorCore::reinitialize(fhicl::ParameterSet const& pset)
00422 {
00423 TLOG_DEBUG(name_) << "reinitialize method called with DAQ "
00424 << "ParameterSet = \"" << pset.to_string()
00425 << "\"." << TLOG_ENDL;
00426 return true;
00427 }
00428
00429 size_t artdaq::AggregatorCore::process_fragments()
00430 {
00431 processing_fragments_.store(true);
00432
00433 size_t eodFragmentsReceived = 0;
00434 bool process_fragments = true;
00435 int senderSlot;
00436 detail::FragCounter fragments_received;
00437 detail::FragCounter fragments_sent;
00438 artdaq::FragmentPtr endSubRunMsg(nullptr);
00439 time_t last_filesize_check_time = subrun_start_time_;
00440
00441
00442 if (true)
00443 {
00444 receiver_ptr_.reset(new artdaq::DataReceiverManager(data_pset_));
00445 receiver_ptr_->start_threads();
00446 }
00447
00448 if (is_data_logger_ && data_pset_.has_key("destinations"))
00449 {
00450 sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
00451
00452 if (sender_ptr_->destinationCount() == 0)
00453 {
00454 sender_ptr_.reset(nullptr);
00455 }
00456 }
00457
00458 TLOG_DEBUG(name_) << "Waiting for first fragment." << TLOG_ENDL;
00459
00460 artdaq::MonitoredQuantityStats::TIME_POINT_T startTime;
00461 while (process_fragments)
00462 {
00463 artdaq::FragmentPtr fragmentPtr(new artdaq::Fragment);
00464
00465 size_t recvTimeout = inrun_recv_timeout_usec_;
00466 if (stop_requested_.load()) { recvTimeout = endrun_recv_timeout_usec_; }
00467 else if (local_pause_requested_.load()) { recvTimeout = pause_recv_timeout_usec_; }
00468
00469 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00470
00471
00472 fragmentPtr = receiver_ptr_->recvFragment(senderSlot, recvTimeout);
00473
00474 stats_helper_.addSample(INPUT_WAIT_STAT_KEY,
00475 (artdaq::MonitoredQuantity::getCurrentTime() - startTime));
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503 if (senderSlot == artdaq::TransferInterface::RECV_TIMEOUT)
00504 {
00505 if (stop_requested_.load() &&
00506 recvTimeout == endrun_recv_timeout_usec_)
00507 {
00508 if (endSubRunMsg != nullptr)
00509 {
00510 TLOG_WARNING(name_)
00511 << "Timeout occurred in attempt to receive data, but as a stop has been requested, will forcibly end the run." << TLOG_ENDL;
00512 event_store_ptr_->flushData();
00513 artdaq::RawEvent_ptr subRunEvent(new artdaq::RawEvent(run_id_.run(), 1, 0));
00514 subRunEvent->insertFragment(std::move(endSubRunMsg));
00515
00516 bool enqStatus = event_queue_.enqTimedWait(subRunEvent, enq_timeout_);
00517 if (!enqStatus)
00518 {
00519 TLOG_ERROR(name_) << "Attempt to send EndOfSubRun fragment to art timed out after " <<
00520 enq_timeout_.count() << " seconds; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00521 }
00522 }
00523 else
00524 {
00525 if (event_count_in_subrun_ > 0)
00526 {
00527 TLOG_ERROR(name_)
00528 << "Timeout receiving data after stop request, and the EndOfSubRun fragment isn't available to send to art; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00529 }
00530 else
00531 {
00532 std::string msg("Timeout receiving data after stop request, and the EndOfSubRun fragment isn't available to send to art;");
00533 msg.append("DAQ may need to be returned to the \"Stopped\" state before further datataking");
00534 logMessage_(msg);
00535 }
00536 }
00537 process_fragments = false;
00538 }
00539 else if (local_pause_requested_.load() &&
00540 recvTimeout == pause_recv_timeout_usec_)
00541 {
00542 if (endSubRunMsg != nullptr)
00543 {
00544 TLOG_WARNING(name_)
00545 << "Timeout occurred in attempt to receive data, but as a pause has been requested, will forcibly pause the run." << TLOG_ENDL;
00546 event_store_ptr_->flushData();
00547 artdaq::RawEvent_ptr subRunEvent(new artdaq::RawEvent(run_id_.run(), 1, 0));
00548 subRunEvent->insertFragment(std::move(endSubRunMsg));
00549
00550 bool enqStatus = event_queue_.enqTimedWait(subRunEvent, enq_timeout_);
00551 if (!enqStatus)
00552 {
00553 TLOG_ERROR(name_) << "Attempt to send EndOfSubRun fragment to art timed out after " <<
00554 enq_timeout_.count() << " seconds; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00555 }
00556 }
00557 else
00558 {
00559 TLOG_ERROR(name_) <<
00560 "Timeout receiving data after pause request, and the EndOfSubRun fragment isn't available to send to art; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00561 }
00562 process_fragments = false;
00563 }
00564
00565 continue;
00566 }
00567 else if (!fragmentPtr)
00568 {
00569 TLOG_ERROR(name_) << "Received invalid fragment from " << senderSlot << ". This is usually the case when a timeout has occurred, but sender was not set to RECV_TIMEOUT as expected." << TLOG_ENDL;
00570 continue;
00571 }
00572 if (!receiver_ptr_->enabled_sources().count(senderSlot))
00573 {
00574 TLOG_ERROR(name_)
00575 << "Invalid senderSlot received from recvFragment: "
00576 << senderSlot << TLOG_ENDL;
00577 continue;
00578 }
00579 fragments_received.incSlot(senderSlot);
00580 if (artdaq::Fragment::isSystemFragmentType(fragmentPtr->type()) &&
00581 fragmentPtr->type() != artdaq::Fragment::DataFragmentType)
00582 {
00583 TLOG_DEBUG(name_)
00584 << "Sender slot = " << senderSlot
00585 << ", fragment type = " << static_cast<int>(fragmentPtr->type())
00586 << ", sequence ID = " << fragmentPtr->sequenceID() << TLOG_ENDL;
00587 }
00588
00589
00590 if (fragmentPtr->type() == artdaq::Fragment::InvalidFragmentType)
00591 {
00592 size_t fragSize = fragmentPtr->size() * sizeof(artdaq::RawDataType);
00593 TLOG_ERROR(name_) << "Fragment received with type of "
00594 << "INVALID. Size = " << fragSize
00595 << ", sequence ID = " << fragmentPtr->sequenceID()
00596 << ", fragment ID = " << fragmentPtr->fragmentID()
00597 << ", and type = " << static_cast<int>(fragmentPtr->type()) << TLOG_ENDL;
00598 continue;
00599 }
00600
00601 if (artdaq::Fragment::isUserFragmentType(fragmentPtr->type()) ||
00602 fragmentPtr->type() == artdaq::Fragment::DataFragmentType)
00603 {
00604 ++event_count_in_run_;
00605 ++event_count_in_subrun_;
00606 if (event_count_in_run_ == 1)
00607 {
00608 logMessage_("Received event " +
00609 boost::lexical_cast<std::string>(event_count_in_run_) +
00610 " with sequence id " +
00611 boost::lexical_cast<std::string>(fragmentPtr->sequenceID()) +
00612 ".");
00613 }
00614 stats_helper_.addSample(INPUT_EVENTS_STAT_KEY, fragmentPtr->size());
00615 if (stats_helper_.readyToReport(event_count_in_run_))
00616 {
00617 std::string statString = buildStatisticsString_();
00618 logMessage_(statString);
00619 logMessage_("Received event " +
00620 boost::lexical_cast<std::string>(event_count_in_run_) +
00621 " with sequence id " +
00622 boost::lexical_cast<std::string>(fragmentPtr->sequenceID()) +
00623 " (run " +
00624 boost::lexical_cast<std::string>(run_id_.run()) +
00625 ", subrun " +
00626 boost::lexical_cast<std::string>(event_store_ptr_->subrunID()) +
00627 ").");
00628 }
00629 }
00630 if (stats_helper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
00631
00632 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00633
00634 if (is_data_logger_ && fragmentPtr->type() == artdaq::Fragment::DataFragmentType
00635 && (event_count_in_run_ % onmon_event_prescale_) == 0 && sender_ptr_)
00636 {
00637 try
00638 {
00639 auto fragCopy = *fragmentPtr;
00640 sender_ptr_->sendFragment(std::move(fragCopy));
00641 }
00642 catch (...)
00643 {
00644 ExceptionHandler(ExceptionHandlerRethrow::no,
00645 "Exception thrown during data logger copy of event to dispatcher");
00646 }
00647 }
00648 else if (is_dispatcher_)
00649 {
00650 if (fragmentPtr->type() != artdaq::Fragment::EndOfDataFragmentType)
00651 {
00652 if (fragmentPtr->type() == artdaq::Fragment::InitFragmentType)
00653 {
00654 init_fragment_ptr_ = std::make_unique<artdaq::Fragment>(*fragmentPtr);
00655 }
00656
00657 std::lock_guard<std::mutex> lock(dispatcher_transfers_mutex_);
00658
00659 if (new_transfers_ == 0)
00660 {
00661
00662 if (dispatcher_transfers_.size() > 0 && fragmentPtr->sequenceID() % 100 == 0)
00663 {
00664 TLOG_DEBUG(name_) << "Dispatcher: broadcasting seqID = " << fragmentPtr->sequenceID() << ", type = " <<
00665 static_cast<size_t>(fragmentPtr->type()) << " to " << dispatcher_transfers_.size()
00666 << " registered monitors" << TLOG_ENDL;
00667 }
00668 for (auto& transfer : dispatcher_transfers_)
00669 {
00670 transfer->copyFragment(*fragmentPtr, 0);
00671 }
00672 }
00673 else
00674 {
00675 for (size_t i_q = dispatcher_transfers_.size() - new_transfers_; i_q < dispatcher_transfers_.size(); ++i_q)
00676 {
00677 TLOG_INFO(name_) << "Copying out init fragment, type " << static_cast<int>(init_fragment_ptr_->type()) <<
00678 ", size " << init_fragment_ptr_->sizeBytes() << TLOG_ENDL;
00679 dispatcher_transfers_[i_q]->copyFragment(*init_fragment_ptr_, 500000);
00680 }
00681 new_transfers_ = 0;
00682 }
00683 }
00684 }
00685
00686 stats_helper_.addSample(SHM_COPY_TIME_STAT_KEY,
00687 (artdaq::MonitoredQuantity::getCurrentTime() - startTime));
00688
00689
00690
00691 artdaq::Fragment::sequence_id_t seq = fragmentPtr->sequenceID();
00692 TRACE(21, "%s::process_fragments seq=%lu isLogger=%d type=%d"
00693 , name_.c_str(), seq, is_data_logger_, fragmentPtr->type());
00694 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00695 if (!art_initialized_)
00696 {
00697
00698
00699 if (fragmentPtr->type() == artdaq::Fragment::InitFragmentType)
00700 {
00701 TLOG_DEBUG(name_) << "Init" << TLOG_ENDL;
00702
00703 if (is_data_logger_ && sender_ptr_)
00704 {
00705 auto fragCopy = *fragmentPtr;
00706 sender_ptr_->sendFragment(std::move(fragCopy));
00707
00708 }
00709
00710 artdaq::RawEvent_ptr initEvent(new artdaq::RawEvent(run_id_.run(), 1, fragmentPtr->sequenceID()));
00711 initEvent->insertFragment(std::move(fragmentPtr));
00712
00713 bool enqStatus = event_queue_.enqTimedWait(initEvent, enq_timeout_);
00714
00715 if (!enqStatus)
00716 {
00717 TLOG_ERROR(name_) << "Attempt to send Init event to art timed out after " <<
00718 enq_timeout_.count() << " seconds; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00719 }
00720 art_initialized_ = true;
00721 }
00722 else
00723 {
00724 TLOG_ERROR(name_) << "Didn't receive an Init event with which to initialize art; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00725 }
00726 }
00727 else
00728 {
00729
00730
00731 if (fragmentPtr->type() == artdaq::Fragment::DataFragmentType)
00732 {
00733 if (is_data_logger_)
00734 {
00735 artdaq::FragmentPtr rejectedFragment;
00736 auto seqId = fragmentPtr->sequenceID();
00737 bool try_again = true;
00738 while (try_again)
00739 {
00740 auto ret = event_store_ptr_->insert(std::move(fragmentPtr), rejectedFragment);
00741 if (ret == EventStore::EventStoreInsertResult::SUCCESS)
00742 {
00743 receiver_ptr_->unsuppressAll();
00744 try_again = false;
00745 }
00746 else if (ret == EventStore::EventStoreInsertResult::SUCCESS_STOREFULL)
00747 {
00748 try_again = false;
00749 }
00750 else if (stop_requested_.load())
00751 {
00752 try_again = false;
00753 process_fragments = false;
00754 receiver_ptr_->reject_fragment(senderSlot, std::move(rejectedFragment));
00755 TLOG_WARNING(name_)
00756 << "Unable to process event " << seqId
00757 << " because of back-pressure - forcibly ending the run." << TLOG_ENDL;
00758 }
00759 else if (local_pause_requested_.load())
00760 {
00761 try_again = false;
00762 process_fragments = false;
00763 receiver_ptr_->reject_fragment(senderSlot, std::move(rejectedFragment));
00764 TLOG_WARNING(name_)
00765 << "Unable to process event " << seqId
00766 << " because of back-pressure - forcibly pausing the run." << TLOG_ENDL;
00767 }
00768 else if (ret == EventStore::EventStoreInsertResult::REJECT_QUEUEFULL)
00769 {
00770 fragmentPtr = std::move(rejectedFragment);
00771 TLOG_WARNING(name_)
00772 << "Unable to process event " << seqId
00773 << " because of back-pressure from art - retrying..." << TLOG_ENDL;
00774 }
00775 else
00776 {
00777 try_again = false;
00778 receiver_ptr_->reject_fragment(senderSlot, std::move(rejectedFragment));
00779 TLOG_WARNING(name_)
00780 << "Unable to process event " << seqId
00781 << " because the EventStore has reached the maximum number of incomplete bunches." << std::endl
00782 << " Will retry when the EventStore is ready for new events." << TLOG_ENDL;
00783 }
00784 }
00785 }
00786 else
00787 {
00788 event_store_ptr_->insert(std::move(fragmentPtr), false);
00789 }
00790 }
00791 else if (fragmentPtr->type() == artdaq::Fragment::EndOfSubrunFragmentType)
00792 {
00793 if (is_data_logger_ && sender_ptr_)
00794 {
00795 auto fragCopy = *fragmentPtr;
00796 sender_ptr_->sendFragment(std::move(fragCopy));
00797 }
00798 else if (is_dispatcher_)
00799 {
00800 for (auto& transfer : dispatcher_transfers_)
00801 {
00802 transfer->copyFragment(*fragmentPtr, 0);
00803 }
00804 }
00805
00806
00807
00808
00809
00810 endSubRunMsg = std::move(fragmentPtr);
00811 }
00812 else if (fragmentPtr->type() == artdaq::Fragment::EndOfDataFragmentType)
00813 {
00814 eodFragmentsReceived++;
00815
00816
00817
00818 fragments_sent.setSlot(senderSlot, *fragmentPtr->dataBegin() + 1);
00819 }
00820 }
00821 float delta = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
00822 stats_helper_.addSample(STORE_EVENT_WAIT_STAT_KEY, delta);
00823 TRACE((delta > 3.0) ? 0 : 22, "%s::process_fragments seq=%lu isLogger=%d delta=%f start=%f"
00824 , name_.c_str(), seq, is_data_logger_, delta, startTime);
00825
00826
00827 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00828 if (is_data_logger_ && disk_writing_directory_.size() > 0 &&
00829 !stop_requested_.load() && !system_pause_requested_.load())
00830 {
00831 bool threshold_reached = false;
00832 if (file_close_event_count_ > 0 &&
00833 event_count_in_subrun_ >= file_close_event_count_)
00834 {
00835 threshold_reached = true;
00836 }
00837 else
00838 {
00839 time_t now = time(0);
00840 if (file_close_timeout_secs_ > 0 &&
00841 (now - subrun_start_time_) >= file_close_timeout_secs_)
00842 {
00843 threshold_reached = true;
00844 }
00845 else
00846 {
00847 if (filesize_check_interval_seconds_ > 0 &&
00848 filesize_check_interval_events_ > 0 &&
00849 (now - last_filesize_check_time) >= filesize_check_interval_seconds_ &&
00850 (event_count_in_run_ % filesize_check_interval_events_) == 0)
00851 {
00852 if (file_close_threshold_bytes_ > 0 &&
00853 getLatestFileSize_() >= file_close_threshold_bytes_)
00854 {
00855 threshold_reached = true;
00856 }
00857 last_filesize_check_time = now;
00858 }
00859 }
00860 }
00861 if (threshold_reached)
00862 {
00863 system_pause_requested_.store(true);
00864 if (pause_thread_.get() != 0)
00865 {
00866 pause_thread_->join();
00867 }
00868 TLOG_DEBUG(name_) << "Starting sendPauseAndResume thread "
00869 << ", event count in subrun = "
00870 << event_count_in_subrun_ << TLOG_ENDL;
00871 pause_thread_.reset(new std::thread(&AggregatorCore::sendPauseAndResume_, this));
00872 }
00873 }
00874 stats_helper_.addSample(FILE_CHECK_TIME_STAT_KEY,
00875 (artdaq::MonitoredQuantity::getCurrentTime() - startTime));
00876
00877
00878
00879
00880
00881
00882 size_t source_count = 0;
00883 if (is_data_logger_) source_count = receiver_ptr_->enabled_sources().size();
00884 else source_count = 1;
00885
00886 if (eodFragmentsReceived >= source_count && endSubRunMsg != nullptr)
00887 {
00888 bool fragmentsOutstanding = false;
00889 if (is_data_logger_)
00890 {
00891 for (auto& i : receiver_ptr_->enabled_sources())
00892 {
00893 if (fragments_received[i] != fragments_sent[i])
00894 {
00895 fragmentsOutstanding = true;
00896 break;
00897 }
00898 }
00899 }
00900
00901 if (!fragmentsOutstanding)
00902 {
00903 event_store_ptr_->flushData();
00904 artdaq::RawEvent_ptr subRunEvent(new artdaq::RawEvent(run_id_.run(), 1, 0));
00905 subRunEvent->insertFragment(std::move(endSubRunMsg));
00906
00907 bool enqStatus = event_queue_.enqTimedWait(subRunEvent, enq_timeout_);
00908
00909 if (!enqStatus)
00910 {
00911 TLOG_ERROR(name_) << "All data appears to have been received but attempt to send EndOfSubRun fragment to art timed out after " <<
00912 enq_timeout_.count() << " seconds; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00913 }
00914 process_fragments = false;
00915 }
00916 else
00917 {
00918 TLOG_WARNING(name_) << "EndOfSubRun fragment and all EndOfData fragments received but more data expected" << TLOG_ENDL;
00919 }
00920 }
00921 }
00922
00923 logMessage_("Subrun " +
00924 boost::lexical_cast<std::string>(event_store_ptr_->subrunID()) +
00925 " in run " + boost::lexical_cast<std::string>(run_id_.run()) +
00926 " has ended. There were " +
00927 boost::lexical_cast<std::string>(event_count_in_subrun_) +
00928 " events in this subrun, and there have been " +
00929 boost::lexical_cast<std::string>(event_count_in_run_) +
00930 " events so far in this run.");
00931
00932 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00933 getMonitoredQuantity(INPUT_EVENTS_STAT_KEY);
00934 if (mqPtr.get() != 0)
00935 {
00936 artdaq::MonitoredQuantityStats stats;
00937 mqPtr->getStats(stats);
00938 std::ostringstream oss;
00939 oss << "Run " << run_id_.run() << " has an overall event rate of ";
00940 oss << std::fixed << std::setprecision(1) << stats.fullSampleRate;
00941 oss << " events/sec.";
00942 logMessage_(oss.str());
00943 previous_run_duration_ = stats.fullDuration;
00944 }
00945
00946
00947
00948
00949 metricMan_.do_stop();
00950
00951 receiver_ptr_.reset(nullptr);
00952 sender_ptr_.reset(nullptr);
00953
00954 processing_fragments_.store(false);
00955 return 0;
00956 }
00957
00958 std::string artdaq::AggregatorCore::report(std::string const& which) const
00959 {
00960 if (which == "event_count")
00961 {
00962 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00963 getMonitoredQuantity(INPUT_EVENTS_STAT_KEY);
00964 if (mqPtr.get() != 0)
00965 {
00966 return boost::lexical_cast<std::string>(mqPtr->getFullSampleCount());
00967 }
00968 else
00969 {
00970 return "-1";
00971 }
00972 }
00973
00974 if (which == "run_duration")
00975 {
00976
00977
00978 double duration = previous_run_duration_;
00979 if (processing_fragments_.load())
00980 {
00981 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00982 getMonitoredQuantity(INPUT_EVENTS_STAT_KEY);
00983 if (mqPtr.get() != 0)
00984 {
00985 duration = mqPtr->getFullDuration();
00986 }
00987 }
00988 std::ostringstream oss;
00989 oss << std::fixed << std::setprecision(1) << duration;
00990 return oss.str();
00991 }
00992
00993 if (which == "file_size")
00994 {
00995 size_t latestFileSize = getLatestFileSize_();
00996 return boost::lexical_cast<std::string>(latestFileSize);
00997 }
00998
00999 if (which == "subrun_number")
01000 {
01001 if (event_store_ptr_.get() != nullptr)
01002 {
01003 return boost::lexical_cast<std::string>(event_store_ptr_->subrunID());
01004 }
01005 else
01006 {
01007 return "-1";
01008 }
01009 }
01010
01011 if (which == "incomplete_event_count")
01012 {
01013 if (event_store_ptr_ != nullptr)
01014 {
01015 return boost::lexical_cast<std::string>(event_store_ptr_->incompleteEventCount());
01016 }
01017 else
01018 {
01019 return "-1";
01020 }
01021 }
01022
01023
01024
01025
01026
01027
01028 std::string tmpString = name_ + " run number = ";
01029 tmpString.append(boost::lexical_cast<std::string>(run_id_.run()));
01030 tmpString.append(". Command=\"" + which + "\" is not currently supported.");
01031 return tmpString;
01032 }
01033
01034 std::string artdaq::AggregatorCore::register_monitor(fhicl::ParameterSet const& pset)
01035 {
01036 TLOG_DEBUG(name_) << "AggregatorCore::register_monitor called with argument \"" << pset.to_string() << "\"" << TLOG_ENDL;
01037 std::lock_guard<std::mutex> lock(dispatcher_transfers_mutex_);
01038
01039 try
01040 {
01041 auto transfer = MakeTransferPlugin(pset, "transfer_plugin", TransferInterface::Role::kSend);
01042
01043 for (auto& existing_transfer_ : dispatcher_transfers_)
01044 {
01045 if (existing_transfer_->uniqueLabel() == transfer->uniqueLabel())
01046 {
01047 std::stringstream errmsg;
01048 errmsg << "Attempt to register newly-created monitor with label \"" <<
01049 transfer->uniqueLabel() << "\" failed; a monitor with that label already exists";
01050 return errmsg.str();
01051 }
01052 }
01053
01054 dispatcher_transfers_.emplace_back(std::move(transfer));
01055
01056 TLOG_INFO(name_) << "Successfully registered monitor with label \"" << dispatcher_transfers_.back()->uniqueLabel() << "\"" << TLOG_ENDL;
01057
01058 new_transfers_++;
01059 }
01060 catch (...)
01061 {
01062 std::stringstream errmsg;
01063 errmsg << "Unable to create a Transfer plugin with the FHiCL code \"" << pset.to_string() << "\", a new monitor has not been registered";
01064 return errmsg.str();
01065 }
01066
01067 return "Success";
01068 }
01069
01070 std::string artdaq::AggregatorCore::unregister_monitor(std::string const& label)
01071 {
01072 TLOG_DEBUG(name_) << "AggregatorCore::unregister_monitor called with argument \"" << label << "\"" << TLOG_ENDL;
01073 std::lock_guard<std::mutex> lock(dispatcher_transfers_mutex_);
01074
01075 try
01076 {
01077 auto r_i_end = std::remove_if(dispatcher_transfers_.begin(),
01078 dispatcher_transfers_.end(),
01079 [label](const std::unique_ptr<TransferInterface>& transfer)
01080 {
01081 return transfer->uniqueLabel() == label;
01082 });
01083
01084 auto nfound = dispatcher_transfers_.end() - r_i_end;
01085
01086 TLOG_INFO(name_) << "Request from monitor with label \"" << label << "\" to unregister received" << TLOG_ENDL;
01087
01088 if (nfound == 1)
01089 {
01090 dispatcher_transfers_.pop_back();
01091 return "Success";
01092 }
01093 else if (nfound == 0)
01094 {
01095 std::stringstream errmsg;
01096 errmsg << "Warning in AggregatorCore::unregister_monitor: unable to find requested transfer plugin with "
01097 << "label \"" << label << "\"";
01098 TLOG_WARNING(name_) << errmsg.str() << TLOG_ENDL;
01099 return errmsg.str();
01100 }
01101 else
01102 {
01103 std::stringstream errmsg;
01104 errmsg << "Warning in AggregatorCore::unregister_monitor: found more than one (" << nfound <<
01105 ") transfer plugins with label \"" << label << "\", will unregister all of them";
01106 TLOG_WARNING(name_) << errmsg.str() << TLOG_ENDL;
01107 dispatcher_transfers_.erase(r_i_end, dispatcher_transfers_.end());
01108 return errmsg.str();
01109 }
01110 }
01111 catch (...)
01112 {
01113 std::stringstream errmsg;
01114 errmsg << "Unable to unregister transfer plugin with label \"" << label << "\"";
01115 return errmsg.str();
01116 }
01117
01118 return "Success";
01119 }
01120
01121
01122 size_t artdaq::AggregatorCore::getLatestFileSize_() const
01123 {
01124 if (disk_writing_directory_.size() == 0)
01125 {
01126 TLOG_DEBUG(name_) << "Latest file size = 0 (no directory)" << TLOG_ENDL;
01127 return 0;
01128 }
01129 BFS::path outputDir(disk_writing_directory_);
01130 BFS::directory_iterator endIter;
01131
01132 std::time_t latestFileTime = 0;
01133 size_t latestFileSize = 0;
01134 if (BFS::exists(outputDir) && BFS::is_directory(outputDir))
01135 {
01136 for (BFS::directory_iterator dirIter(outputDir); dirIter != endIter; ++dirIter)
01137 {
01138 BFS::path pathObj = dirIter->path();
01139 if (pathObj.filename().string().find("RootOutput") != std::string::npos &&
01140 pathObj.filename().string().find("root") != std::string::npos)
01141 {
01142 if (BFS::last_write_time(pathObj) >= latestFileTime)
01143 {
01144 latestFileTime = BFS::last_write_time(pathObj);
01145 latestFileSize = BFS::file_size(pathObj);
01146 }
01147 }
01148 }
01149 }
01150 time_t now = time(0);
01151 if ((now - latestFileTime) < 60)
01152 {
01153 TLOG_DEBUG(name_) << "Latest file size = "
01154 << latestFileSize << TLOG_ENDL;
01155 return latestFileSize;
01156 }
01157 else
01158 {
01159 TLOG_DEBUG(name_) << "Latest file size = 0 (too old)" << TLOG_ENDL;
01160 return 0;
01161 }
01162 }
01163
01164 bool artdaq::AggregatorCore::sendPauseAndResume_()
01165 {
01166 xmlrpc_c::clientSimple myClient;
01167 TLOG_INFO(name_) << "Starting automatic pause..." << TLOG_ENDL;
01168 for (size_t igrp = 0; igrp < xmlrpc_client_lists_.size(); ++igrp)
01169 {
01170 for (size_t idx = 0; idx < xmlrpc_client_lists_[igrp].size(); ++idx)
01171 {
01172 for (size_t iAttempt = 0; iAttempt < 5; ++iAttempt)
01173 {
01174 xmlrpc_c::value result;
01175 myClient.call((xmlrpc_client_lists_[igrp])[idx], "daq.pause", &result);
01176 std::string const resultString = xmlrpc_c::value_string(result);
01177 TLOG_DEBUG(name_) << "Pause: "
01178 << (xmlrpc_client_lists_[igrp])[idx]
01179 << " " << resultString << TLOG_ENDL;
01180 if (std::string::npos !=
01181 boost::algorithm::to_lower_copy(resultString).find("success"))
01182 {
01183 break;
01184 }
01185 else
01186 {
01187 sleep(2);
01188 TLOG_WARNING(name_) << "Retrying pause command to "
01189 << (xmlrpc_client_lists_[igrp])[idx]
01190 << " (" << resultString << ")" << TLOG_ENDL;
01191 }
01192 }
01193 }
01194 }
01195 TLOG_INFO(name_) << "Starting automatic resume..." << TLOG_ENDL;
01196 for (int igrp = (xmlrpc_client_lists_.size() - 1); igrp >= 0; --igrp)
01197 {
01198 for (size_t idx = 0; idx < xmlrpc_client_lists_[igrp].size(); ++idx)
01199 {
01200 for (size_t iAttempt = 0; iAttempt < 5; ++iAttempt)
01201 {
01202 xmlrpc_c::value result;
01203 myClient.call((xmlrpc_client_lists_[igrp])[idx], "daq.resume", &result);
01204 std::string const resultString = xmlrpc_c::value_string(result);
01205 TLOG_DEBUG(name_) << "Resume: "
01206 << (xmlrpc_client_lists_[igrp])[idx]
01207 << " " << resultString << TLOG_ENDL;
01208 if (std::string::npos !=
01209 boost::algorithm::to_lower_copy(resultString).find("success"))
01210 {
01211 break;
01212 }
01213 else
01214 {
01215 sleep(2);
01216 TLOG_WARNING(name_) << "Retrying resume command to "
01217 << (xmlrpc_client_lists_[igrp])[idx]
01218 << " (" << resultString << ")" << TLOG_ENDL;
01219 }
01220 }
01221 }
01222 }
01223 TLOG_INFO(name_) << "Done with automatic resume..." << TLOG_ENDL;
01224 system_pause_requested_.store(false);
01225 return true;
01226 }
01227
01228 void artdaq::AggregatorCore::logMessage_(std::string const& text)
01229 {
01230 if (is_data_logger_)
01231 {
01232 TLOG_INFO(name_) << text << TLOG_ENDL;
01233 }
01234 else
01235 {
01236 TLOG_DEBUG(name_) << text << TLOG_ENDL;
01237 }
01238 }
01239
01240 std::string artdaq::AggregatorCore::buildStatisticsString_()
01241 {
01242 std::ostringstream oss;
01243 double eventCount = 1.0;
01244 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
01245 getMonitoredQuantity(INPUT_EVENTS_STAT_KEY);
01246 if (mqPtr.get() != 0)
01247 {
01248
01249 artdaq::MonitoredQuantityStats stats;
01250 mqPtr->getStats(stats);
01251 oss << "Input statistics: "
01252 << stats.recentSampleCount << " events received at "
01253 << stats.recentSampleRate << " events/sec, data rate = "
01254 << (stats.recentValueRate * sizeof(artdaq::RawDataType)
01255 / 1024.0 / 1024.0) << " MB/sec, monitor window = "
01256 << stats.recentDuration << " sec, min::max event size = "
01257 << (stats.recentValueMin * sizeof(artdaq::RawDataType)
01258 / 1024.0 / 1024.0)
01259 << "::"
01260 << (stats.recentValueMax * sizeof(artdaq::RawDataType)
01261 / 1024.0 / 1024.0)
01262 << " MB" << std::endl;
01263 eventCount = std::max(double(stats.recentSampleCount), 1.0);
01264 oss << "Average times per event: ";
01265 if (stats.recentSampleRate > 0.0)
01266 {
01267 oss << " elapsed time = "
01268 << (1.0 / stats.recentSampleRate) << " sec";
01269 }
01270 }
01271
01272
01273
01274
01275
01276
01277
01278 mqPtr = artdaq::StatisticsCollection::getInstance().
01279 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
01280 if (mqPtr.get() != 0)
01281 {
01282 oss << ", input wait time = "
01283 << (mqPtr->getRecentValueSum() / eventCount) << " sec";
01284 }
01285
01286 mqPtr = artdaq::StatisticsCollection::getInstance().
01287 getMonitoredQuantity(STORE_EVENT_WAIT_STAT_KEY);
01288 if (mqPtr.get() != 0)
01289 {
01290 artdaq::MonitoredQuantityStats stats;
01291 mqPtr->getStats(stats);
01292 oss << ", avg::max event store wait time = "
01293 << (stats.recentValueSum / eventCount)
01294 << "::" << stats.recentValueMax
01295 << " sec";
01296 }
01297
01298 mqPtr = artdaq::StatisticsCollection::getInstance().
01299 getMonitoredQuantity(SHM_COPY_TIME_STAT_KEY);
01300 if (mqPtr.get() != 0)
01301 {
01302 oss << ", shared memory copy time = "
01303 << (mqPtr->getRecentValueSum() / eventCount) << " sec";
01304 }
01305
01306 mqPtr = artdaq::StatisticsCollection::getInstance().
01307 getMonitoredQuantity(FILE_CHECK_TIME_STAT_KEY);
01308 if (mqPtr.get() != 0)
01309 {
01310 oss << ", file size test time = "
01311 << (mqPtr->getRecentValueSum() / eventCount) << " sec";
01312 }
01313
01314 return oss.str();
01315 }
01316
01317 void artdaq::AggregatorCore::sendMetrics_()
01318 {
01319
01320 double eventCount = 1.0;
01321 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
01322 getMonitoredQuantity(INPUT_EVENTS_STAT_KEY);
01323 if (mqPtr.get() != 0)
01324 {
01325 artdaq::MonitoredQuantityStats stats;
01326 mqPtr->getStats(stats);
01327 eventCount = std::max(double(stats.recentSampleCount), 1.0);
01328 metricMan_.sendMetric("Event Rate", stats.recentSampleRate, "events/sec", 1, MetricMode::Average);
01329 metricMan_.sendMetric("Average Event Size", (stats.recentValueAverage * sizeof(artdaq::RawDataType)), "bytes/event", 2, MetricMode::Average);
01330 metricMan_.sendMetric("Data Rate", (stats.recentValueRate * sizeof(artdaq::RawDataType)), "bytes/sec", 2, MetricMode::Average);
01331 }
01332
01333
01334
01335
01336
01337
01338
01339 mqPtr = artdaq::StatisticsCollection::getInstance().
01340 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
01341 if (mqPtr.get() != 0)
01342 {
01343 metricMan_.sendMetric("Average Input Wait Time", (mqPtr->getRecentValueSum() / eventCount), "seconds/event", 3, MetricMode::Average);
01344 }
01345
01346 mqPtr = artdaq::StatisticsCollection::getInstance().
01347 getMonitoredQuantity(STORE_EVENT_WAIT_STAT_KEY);
01348 if (mqPtr.get() != 0)
01349 {
01350 metricMan_.sendMetric("Avg art Queue Wait Time", (mqPtr->getRecentValueSum() / eventCount), "seconds/event", 3, MetricMode::Average);
01351 }
01352
01353 mqPtr = artdaq::StatisticsCollection::getInstance().
01354 getMonitoredQuantity(SHM_COPY_TIME_STAT_KEY);
01355 if (mqPtr.get() != 0)
01356 {
01357 metricMan_.sendMetric("Avg Shared Memory Copy Time", (mqPtr->getRecentValueSum() / eventCount), "seconds/event", 4, MetricMode::Average);
01358 }
01359
01360 mqPtr = artdaq::StatisticsCollection::getInstance().
01361 getMonitoredQuantity(FILE_CHECK_TIME_STAT_KEY);
01362 if (mqPtr.get() != 0)
01363 {
01364 metricMan_.sendMetric("Average File Check Time", (mqPtr->getRecentValueSum() / eventCount), "seconds/event", 4, MetricMode::Average);
01365 }
01366 }