00001 #pragma GCC diagnostic push
00002 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
00003 #include "xmlrpc-c/client_simple.hpp"
00004 #pragma GCC diagnostic pop
00005
00006 #include <errno.h>
00007 #include <sstream>
00008 #include <iomanip>
00009 #include <bitset>
00010
00011 #include <boost/tokenizer.hpp>
00012 #include <boost/filesystem.hpp>
00013 #include <boost/algorithm/string.hpp>
00014 #include "art/Framework/Art/artapp.h"
00015 #include "cetlib/BasicPluginFactory.h"
00016
00017 #include "artdaq-core/Core/SimpleQueueReader.hh"
00018 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00019 #include "artdaq-core/Data/RawEvent.hh"
00020
00021 #include "artdaq/Application/AggregatorCore.hh"
00022 #include "artdaq/DAQrate/EventStore.hh"
00023 #include "artdaq/DAQrate/detail/FragCounter.hh"
00024 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
00025
00026
00027 namespace BFS = boost::filesystem;
00028
00029 const std::string artdaq::AggregatorCore::INPUT_EVENTS_STAT_KEY("AggregatorCoreInputEvents");
00030 const std::string artdaq::AggregatorCore::INPUT_WAIT_STAT_KEY("AggregatorCoreInputWaitTime");
00031 const std::string artdaq::AggregatorCore::STORE_EVENT_WAIT_STAT_KEY("AggregatorCoreStoreEventWaitTime");
00032 const std::string artdaq::AggregatorCore::SHM_COPY_TIME_STAT_KEY("AggregatorCoreShmCopyTime");
00033 const std::string artdaq::AggregatorCore::FILE_CHECK_TIME_STAT_KEY("AggregatorCoreFileCheckTime");
00034
00035 namespace artdaq
00036 {
00043 void display_bits(void* memstart, size_t nbytes, std::string sourcename)
00044 {
00045 std::stringstream bitstr;
00046 bitstr << "The " << nbytes << "-byte chunk of memory beginning at " << static_cast<void*>(memstart) << " is : ";
00047
00048 for (unsigned int i = 0; i < nbytes; i++)
00049 {
00050 if (i % 4 == 0)
00051 {
00052 bitstr << "\n";
00053 }
00054
00055 bitstr << std::bitset<8>(*((reinterpret_cast<uint8_t*>(memstart)) + i)) << " ";
00056 }
00057
00058 TLOG_DEBUG(sourcename) << bitstr.str() << TLOG_ENDL;
00059 }
00060 }
00061
00062
00063 artdaq::AggregatorCore::AggregatorCore(int rank, std::string name)
00064 : name_(name)
00065 , art_initialized_(false)
00066 , event_queue_(artdaq::getGlobalQueue(10))
00067 , stop_requested_(false)
00068 , local_pause_requested_(false)
00069 , processing_fragments_(false)
00070 , system_pause_requested_(false)
00071 , previous_run_duration_(-1.0)
00072 , new_transfers_(0)
00073 {
00074 TLOG_DEBUG(name_) << "Constructor" << TLOG_ENDL;
00075 stats_helper_.addMonitoredQuantityName(INPUT_EVENTS_STAT_KEY);
00076 stats_helper_.addMonitoredQuantityName(INPUT_WAIT_STAT_KEY);
00077 stats_helper_.addMonitoredQuantityName(STORE_EVENT_WAIT_STAT_KEY);
00078 stats_helper_.addMonitoredQuantityName(SHM_COPY_TIME_STAT_KEY);
00079 stats_helper_.addMonitoredQuantityName(FILE_CHECK_TIME_STAT_KEY);
00080 metricMan = &metricMan_;
00081 my_rank = rank;
00082 }
00083
00084 artdaq::AggregatorCore::~AggregatorCore()
00085 {
00086 TLOG_DEBUG(name_) << "Destructor" << TLOG_ENDL;
00087 }
00088
00089 bool artdaq::AggregatorCore::initialize(fhicl::ParameterSet const& pset)
00090 {
00091 init_string_ = pset.to_string();
00092 TLOG_DEBUG(name_) << "initialize method called with DAQ " << "ParameterSet = \"" << init_string_ << "\"." << TLOG_ENDL;
00093
00094
00095 fhicl::ParameterSet daq_pset;
00096 try
00097 {
00098 daq_pset = pset.get<fhicl::ParameterSet>("daq");
00099 }
00100 catch (...)
00101 {
00102 TLOG_ERROR(name_)
00103 << "Unable to find the DAQ parameters in the initialization "
00104 << "ParameterSet: \"" + pset.to_string() + "\"." << TLOG_ENDL;
00105 return false;
00106 }
00107 fhicl::ParameterSet agg_pset;
00108 try
00109 {
00110 agg_pset = daq_pset.get<fhicl::ParameterSet>("aggregator");
00111 data_pset_ = agg_pset;
00112 }
00113 catch (...)
00114 {
00115 TLOG_ERROR(name_)
00116 << "Unable to find the aggregator parameters in the DAQ "
00117 << "initialization ParameterSet: \"" + daq_pset.to_string() + "\"." << TLOG_ENDL;
00118 return false;
00119 }
00120 try
00121 {
00122 expected_events_per_bunch_ =
00123 agg_pset.get<size_t>("expected_events_per_bunch");
00124 }
00125 catch (...)
00126 {
00127 TLOG_ERROR(name_)
00128 << "The expected_events_per_bunch parameter was not specified "
00129 << "in the aggregator initialization PSet: \"" << pset.to_string()
00130 << "\"." << TLOG_ENDL;
00131 return false;
00132 }
00133
00134 enq_timeout_ = static_cast<detail::seconds>(agg_pset.get<size_t>("enq_timeout", 5.0));
00135
00136
00137
00138
00139
00140 is_data_logger_ = false;
00141 is_online_monitor_ = false;
00142 is_dispatcher_ = false;
00143 std::string metricsReportingInstanceName = "Data Logger";
00144 bool agtype_was_specified = false;
00145 if (!agtype_was_specified)
00146 {
00147 try
00148 {
00149 is_data_logger_ = agg_pset.get<bool>("is_data_logger");
00150 agtype_was_specified = true;
00151 }
00152 catch (...) {}
00153 }
00154 if (!agtype_was_specified)
00155 {
00156 try
00157 {
00158 is_online_monitor_ = agg_pset.get<bool>("is_online_monitor");
00159 metricsReportingInstanceName = "Online Monitor";
00160 agtype_was_specified = true;
00161 }
00162 catch (...) {}
00163 }
00164 if (!agtype_was_specified)
00165 {
00166 try
00167 {
00168 is_dispatcher_ = agg_pset.get<bool>("is_dispatcher");
00169 metricsReportingInstanceName = "Dispatcher";
00170 agtype_was_specified = true;
00171 }
00172 catch (...) {}
00173 }
00174
00175 if (!agtype_was_specified)
00176 {
00177 throw cet::exception("ConfigurationException", "You must specify one of is_data_logger, is_online_monitor or is_dispatcher");
00178 return false;
00179 }
00180 TLOG_DEBUG(name_) << "Rank " << my_rank
00181 << ", is_data_logger = " << is_data_logger_
00182 << ", is_online_monitor = " << is_online_monitor_
00183 << ", is_dispatcher = " << is_dispatcher_ << TLOG_ENDL;
00184
00185 disk_writing_directory_ = "";
00186 try
00187 {
00188 fhicl::ParameterSet output_pset =
00189 pset.get<fhicl::ParameterSet>("outputs");
00190 fhicl::ParameterSet normalout_pset =
00191 output_pset.get<fhicl::ParameterSet>("normalOutput");
00192
00193 if (!normalout_pset.is_empty())
00194 {
00195 std::string filename = normalout_pset.get<std::string>("fileName", "");
00196 if (filename.size() > 0)
00197 {
00198 size_t pos = filename.rfind("/");
00199 if (pos != std::string::npos)
00200 {
00201 disk_writing_directory_ = filename.substr(0, pos);
00202 }
00203 }
00204 else
00205 {
00206 TLOG_WARNING(name_) << "Problem finding \"fileName\" parameter in \"normalOutput\" RootOutput module FHiCL code" << TLOG_ENDL;
00207 }
00208 }
00209 }
00210 catch (...) {}
00211
00212 std::string xmlrpcClientString =
00213 agg_pset.get<std::string>("xmlrpc_client_list", "");
00214 if (xmlrpcClientString.size() > 0)
00215 {
00216 xmlrpc_client_lists_.clear();
00217 boost::char_separator<char> sep1(";");
00218 boost::tokenizer<boost::char_separator<char>>
00219 primaryTokens(xmlrpcClientString, sep1);
00220 boost::tokenizer<boost::char_separator<char>>::iterator iter1;
00221 boost::tokenizer<boost::char_separator<char>>::iterator
00222 endIter1 = primaryTokens.end();
00223 for (iter1 = primaryTokens.begin(); iter1 != endIter1; ++iter1)
00224 {
00225 boost::char_separator<char> sep2(",");
00226 boost::tokenizer<boost::char_separator<char>>
00227 secondaryTokens(*iter1, sep2);
00228 boost::tokenizer<boost::char_separator<char>>::iterator iter2;
00229 boost::tokenizer<boost::char_separator<char>>::iterator
00230 endIter2 = secondaryTokens.end();
00231 int clientGroup = -1;
00232 std::string url = "";
00233 int loopCount = 0;
00234 for (iter2 = secondaryTokens.begin(); iter2 != endIter2; ++iter2)
00235 {
00236 switch (loopCount)
00237 {
00238 case 0:
00239 url = *iter2;
00240 break;
00241 case 1:
00242 try
00243 {
00244 clientGroup = boost::lexical_cast<int>(*iter2);
00245 }
00246 catch (...) {}
00247 break;
00248 default:
00249 TLOG_WARNING(name_)
00250 << "Unexpected XMLRPC client list element, index = "
00251 << loopCount << ", value = \"" << *iter2 << "\"" << TLOG_ENDL;
00252 }
00253 ++loopCount;
00254 }
00255 if (clientGroup >= 0 && url.size() > 0)
00256 {
00257 int elementsNeeded = clientGroup + 1 - ((int)xmlrpc_client_lists_.size());
00258 for (int idx = 0; idx < elementsNeeded; ++idx)
00259 {
00260 std::vector<std::string> tmpVec;
00261 xmlrpc_client_lists_.push_back(tmpVec);
00262 }
00263 xmlrpc_client_lists_[clientGroup].push_back(url);
00264 }
00265 }
00266 }
00267 double fileSizeMB = agg_pset.get<double>("subrun_size_MB", 0);
00268 file_close_threshold_bytes_ = ((size_t)fileSizeMB * 1024.0 * 1024.0);
00269 file_close_timeout_secs_ = agg_pset.get<time_t>("subrun_duration", 0);
00270 file_close_event_count_ = agg_pset.get<size_t>("subrun_event_count", 0);
00271
00272 inrun_recv_timeout_usec_ = agg_pset.get<size_t>("inrun_recv_timeout_usec", 100000);
00273 endrun_recv_timeout_usec_ = agg_pset.get<size_t>("endrun_recv_timeout_usec", 20000000);
00274 pause_recv_timeout_usec_ = agg_pset.get<size_t>("pause_recv_timeout_usec", 3000000);
00275
00276 onmon_event_prescale_ = agg_pset.get<size_t>("onmon_event_prescale", 1);
00277
00278 filesize_check_interval_seconds_ = agg_pset.get<int32_t>("filesize_check_interval_seconds", 20);
00279 filesize_check_interval_events_ = agg_pset.get<int32_t>("filesize_check_interval_events", 20);
00280
00281
00282 stats_helper_.createCollectors(agg_pset, 50, 20.0, 60.0, INPUT_EVENTS_STAT_KEY);
00283
00284
00285 fhicl::ParameterSet metric_pset;
00286
00287 try
00288 {
00289 metric_pset = daq_pset.get<fhicl::ParameterSet>("metrics");
00290 }
00291 catch (...) {}
00292
00293 if (metric_pset.is_empty())
00294 {
00295 TLOG_INFO(name_) << "No metric plugins appear to be defined" << TLOG_ENDL;
00296 }
00297 try
00298 {
00299 metricMan_.initialize(metric_pset, metricsReportingInstanceName);
00300 }
00301 catch (...)
00302 {
00303 ExceptionHandler(ExceptionHandlerRethrow::no,
00304 "Error loading metrics in AggregatorCore::initialize()");
00305 }
00306
00307 if (event_store_ptr_ == nullptr)
00308 {
00309 artdaq::EventStore::ART_CFGSTRING_FCN* reader = &artapp_string_config;
00310 size_t desired_events_per_bunch = expected_events_per_bunch_;
00311 if (is_online_monitor_ || is_dispatcher_)
00312 {
00313 desired_events_per_bunch = 1;
00314 }
00315 TRACE(36, "Creating EventStore and Starting art thread");
00316 event_store_ptr_.reset(new artdaq::EventStore(agg_pset, desired_events_per_bunch, 1,
00317 init_string_, reader));
00318 TRACE(36, "Done Creating EventStore");
00319 event_store_ptr_->setSeqIDModulus(desired_events_per_bunch);
00320 fhicl::ParameterSet tmp = pset;
00321 tmp.erase("daq");
00322 previous_pset_ = tmp;
00323 }
00324 else
00325 {
00326 fhicl::ParameterSet tmp = pset;
00327 tmp.erase("daq");
00328 if (tmp != previous_pset_)
00329 {
00330 TLOG_ERROR(name_)
00331 << "The art configuration can not be altered after art "
00332 << "has been configured." << TLOG_ENDL;
00333 return false;
00334 }
00335 }
00336
00337 return true;
00338 }
00339
00340 bool artdaq::AggregatorCore::start(art::RunID id)
00341 {
00342 event_count_in_run_ = 0;
00343 event_count_in_subrun_ = 0;
00344 subrun_start_time_ = time(0);
00345 stats_helper_.resetStatistics();
00346 previous_run_duration_ = -1.0;
00347
00348 stop_requested_.store(false);
00349 local_pause_requested_.store(false);
00350 run_id_ = id;
00351 metricMan_.do_start();
00352 event_store_ptr_->startRun(run_id_.run());
00353
00354 logMessage_("Started run " + boost::lexical_cast<std::string>(run_id_.run()));
00355 return true;
00356 }
00357
00358 bool artdaq::AggregatorCore::stop()
00359 {
00360 logMessage_("Stopping run " + boost::lexical_cast<std::string>(run_id_.run()) +
00361 ", " + boost::lexical_cast<std::string>(event_count_in_run_) +
00362 " events received so far.");
00363
00364
00365
00366
00367 stop_requested_.store(true);
00368 return true;
00369 }
00370
00371 bool artdaq::AggregatorCore::pause()
00372 {
00373 logMessage_("Pausing run " + boost::lexical_cast<std::string>(run_id_.run()) +
00374 ", " + boost::lexical_cast<std::string>(event_count_in_run_) +
00375 " events received so far.");
00376
00377
00378
00379
00380 local_pause_requested_.store(true);
00381 return true;
00382 }
00383
00384 bool artdaq::AggregatorCore::resume()
00385 {
00386 event_count_in_subrun_ = 0;
00387 subrun_start_time_ = time(0);
00388 local_pause_requested_.store(false);
00389
00390 logMessage_("Resuming run " + boost::lexical_cast<std::string>(run_id_.run()));
00391 metricMan_.do_start();
00392 event_store_ptr_->startSubrun();
00393 return true;
00394 }
00395
00396 bool artdaq::AggregatorCore::shutdown()
00397 {
00398 int readerReturnValue;
00399 bool endSucceeded = false;
00400 int attemptsToEnd = 1;
00401 endSucceeded = event_store_ptr_->endOfData(readerReturnValue);
00402 while (!endSucceeded && attemptsToEnd < 3)
00403 {
00404 ++attemptsToEnd;
00405 TLOG_DEBUG(name_) << "Retrying EventStore::endOfData()" << TLOG_ENDL;
00406 endSucceeded = event_store_ptr_->endOfData(readerReturnValue);
00407 }
00408 metricMan_.shutdown();
00409
00410 return endSucceeded;
00411 }
00412
00413 bool artdaq::AggregatorCore::soft_initialize(fhicl::ParameterSet const& pset)
00414 {
00415 TLOG_DEBUG(name_) << "soft_initialize method called with DAQ "
00416 << "ParameterSet = \"" << pset.to_string()
00417 << "\"." << TLOG_ENDL;
00418 return true;
00419 }
00420
00421 bool artdaq::AggregatorCore::reinitialize(fhicl::ParameterSet const& pset)
00422 {
00423 TLOG_DEBUG(name_) << "reinitialize method called with DAQ "
00424 << "ParameterSet = \"" << pset.to_string()
00425 << "\"." << TLOG_ENDL;
00426 return true;
00427 }
00428
00429 size_t artdaq::AggregatorCore::process_fragments()
00430 {
00431 processing_fragments_.store(true);
00432
00433 size_t eodFragmentsReceived = 0;
00434 bool process_fragments = true;
00435 int senderSlot;
00436 detail::FragCounter fragments_received;
00437 detail::FragCounter fragments_sent;
00438 artdaq::FragmentPtr endSubRunMsg(nullptr);
00439 time_t last_filesize_check_time = subrun_start_time_;
00440
00441
00442 if (true)
00443 {
00444 receiver_ptr_.reset(new artdaq::DataReceiverManager(data_pset_));
00445 receiver_ptr_->start_threads();
00446 }
00447
00448 if (is_data_logger_ && data_pset_.has_key("destinations"))
00449 {
00450 sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
00451
00452 if (sender_ptr_->destinationCount() == 0) {
00453 sender_ptr_.reset(nullptr);
00454 }
00455 }
00456
00457 TLOG_DEBUG(name_) << "Waiting for first fragment." << TLOG_ENDL;
00458
00459 artdaq::MonitoredQuantityStats::TIME_POINT_T startTime;
00460 while (process_fragments)
00461 {
00462 artdaq::FragmentPtr fragmentPtr(new artdaq::Fragment);
00463
00464 size_t recvTimeout = inrun_recv_timeout_usec_;
00465 if (stop_requested_.load()) { recvTimeout = endrun_recv_timeout_usec_; }
00466 else if (local_pause_requested_.load()) { recvTimeout = pause_recv_timeout_usec_; }
00467
00468 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00469
00470
00471 fragmentPtr = receiver_ptr_->recvFragment(senderSlot, recvTimeout);
00472
00473 stats_helper_.addSample(INPUT_WAIT_STAT_KEY,
00474 (artdaq::MonitoredQuantity::getCurrentTime() - startTime));
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502 if (senderSlot == artdaq::TransferInterface::RECV_TIMEOUT)
00503 {
00504 if (stop_requested_.load() &&
00505 recvTimeout == endrun_recv_timeout_usec_)
00506 {
00507 if (endSubRunMsg != nullptr)
00508 {
00509 TLOG_WARNING(name_)
00510 << "Timeout occurred in attempt to receive data, but as a stop has been requested, will forcibly end the run." << TLOG_ENDL;
00511 event_store_ptr_->flushData();
00512 artdaq::RawEvent_ptr subRunEvent(new artdaq::RawEvent(run_id_.run(), 1, 0));
00513 subRunEvent->insertFragment(std::move(endSubRunMsg));
00514
00515 bool enqStatus = event_queue_.enqTimedWait(subRunEvent, enq_timeout_);
00516 if (!enqStatus)
00517 {
00518 TLOG_ERROR(name_) << "Attempt to send EndOfSubRun fragment to art timed out after " <<
00519 enq_timeout_.count() << " seconds; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00520 }
00521 }
00522 else
00523 {
00524 if (event_count_in_subrun_ > 0)
00525 {
00526 TLOG_ERROR(name_)
00527 << "Timeout receiving data after stop request, and the EndOfSubRun fragment isn't available to send to art; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00528 }
00529 else
00530 {
00531 std::string msg("Timeout receiving data after stop request, and the EndOfSubRun fragment isn't available to send to art;");
00532 msg.append("DAQ may need to be returned to the \"Stopped\" state before further datataking");
00533 logMessage_(msg);
00534 }
00535 }
00536 process_fragments = false;
00537 }
00538 else if (local_pause_requested_.load() &&
00539 recvTimeout == pause_recv_timeout_usec_)
00540 {
00541 if (endSubRunMsg != nullptr)
00542 {
00543 TLOG_WARNING(name_)
00544 << "Timeout occurred in attempt to receive data, but as a pause has been requested, will forcibly pause the run." << TLOG_ENDL;
00545 event_store_ptr_->flushData();
00546 artdaq::RawEvent_ptr subRunEvent(new artdaq::RawEvent(run_id_.run(), 1, 0));
00547 subRunEvent->insertFragment(std::move(endSubRunMsg));
00548
00549 bool enqStatus = event_queue_.enqTimedWait(subRunEvent, enq_timeout_);
00550 if (!enqStatus)
00551 {
00552 TLOG_ERROR(name_) << "Attempt to send EndOfSubRun fragment to art timed out after " <<
00553 enq_timeout_.count() << " seconds; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00554 }
00555 }
00556 else
00557 {
00558 TLOG_ERROR(name_) <<
00559 "Timeout receiving data after pause request, and the EndOfSubRun fragment isn't available to send to art; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00560 }
00561 process_fragments = false;
00562 }
00563
00564 continue;
00565 }
00566 else if (!fragmentPtr)
00567 {
00568 TLOG_ERROR(name_) << "Received invalid fragment from " << senderSlot << ". This is usually the case when a timeout has occurred, but sender was not set to RECV_TIMEOUT as expected." << TLOG_ENDL;
00569 continue;
00570 }
00571 if (!receiver_ptr_->enabled_sources().count(senderSlot))
00572 {
00573 TLOG_ERROR(name_)
00574 << "Invalid senderSlot received from recvFragment: "
00575 << senderSlot << TLOG_ENDL;
00576 continue;
00577 }
00578 fragments_received.incSlot(senderSlot);
00579 if (artdaq::Fragment::isSystemFragmentType(fragmentPtr->type()) &&
00580 fragmentPtr->type() != artdaq::Fragment::DataFragmentType)
00581 {
00582 TLOG_DEBUG(name_)
00583 << "Sender slot = " << senderSlot
00584 << ", fragment type = " << static_cast<int>(fragmentPtr->type())
00585 << ", sequence ID = " << fragmentPtr->sequenceID() << TLOG_ENDL;
00586 }
00587
00588
00589 if (fragmentPtr->type() == artdaq::Fragment::InvalidFragmentType)
00590 {
00591 size_t fragSize = fragmentPtr->size() * sizeof(artdaq::RawDataType);
00592 TLOG_ERROR(name_) << "Fragment received with type of "
00593 << "INVALID. Size = " << fragSize
00594 << ", sequence ID = " << fragmentPtr->sequenceID()
00595 << ", fragment ID = " << fragmentPtr->fragmentID()
00596 << ", and type = " << static_cast<int>(fragmentPtr->type()) << TLOG_ENDL;
00597 continue;
00598 }
00599
00600 if (artdaq::Fragment::isUserFragmentType(fragmentPtr->type()) ||
00601 fragmentPtr->type() == artdaq::Fragment::DataFragmentType)
00602 {
00603 ++event_count_in_run_;
00604 ++event_count_in_subrun_;
00605 if (event_count_in_run_ == 1)
00606 {
00607 logMessage_("Received event " +
00608 boost::lexical_cast<std::string>(event_count_in_run_) +
00609 " with sequence id " +
00610 boost::lexical_cast<std::string>(fragmentPtr->sequenceID()) +
00611 ".");
00612 }
00613 stats_helper_.addSample(INPUT_EVENTS_STAT_KEY, fragmentPtr->size());
00614 if (stats_helper_.readyToReport(event_count_in_run_))
00615 {
00616 std::string statString = buildStatisticsString_();
00617 logMessage_(statString);
00618 logMessage_("Received event " +
00619 boost::lexical_cast<std::string>(event_count_in_run_) +
00620 " with sequence id " +
00621 boost::lexical_cast<std::string>(fragmentPtr->sequenceID()) +
00622 " (run " +
00623 boost::lexical_cast<std::string>(run_id_.run()) +
00624 ", subrun " +
00625 boost::lexical_cast<std::string>(event_store_ptr_->subrunID()) +
00626 ").");
00627 }
00628 }
00629 if (stats_helper_.statsRollingWindowHasMoved()) { sendMetrics_(); }
00630
00631 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00632
00633 if (is_data_logger_ && fragmentPtr->type() == artdaq::Fragment::DataFragmentType
00634 && (event_count_in_run_ % onmon_event_prescale_) == 0 && sender_ptr_)
00635 {
00636 try
00637 {
00638 auto fragCopy = *fragmentPtr;
00639 sender_ptr_->sendFragment(std::move(fragCopy));
00640 }
00641 catch (...)
00642 {
00643 ExceptionHandler(ExceptionHandlerRethrow::no,
00644 "Exception thrown during data logger copy of event to dispatcher");
00645 }
00646 }
00647 else if (is_dispatcher_)
00648 {
00649 if (fragmentPtr->type() != artdaq::Fragment::EndOfDataFragmentType)
00650 {
00651 if (fragmentPtr->type() == artdaq::Fragment::InitFragmentType)
00652 {
00653 init_fragment_ptr_ = std::make_unique<artdaq::Fragment>(*fragmentPtr);
00654 }
00655
00656 std::lock_guard<std::mutex> lock(dispatcher_transfers_mutex_);
00657
00658 if (new_transfers_ == 0)
00659 {
00660
00661 if (dispatcher_transfers_.size() > 0 && fragmentPtr->sequenceID() % 100 == 0)
00662 {
00663 TLOG_DEBUG(name_) << "Dispatcher: broadcasting seqID = " << fragmentPtr->sequenceID() << ", type = " <<
00664 static_cast<size_t>(fragmentPtr->type()) << " to " << dispatcher_transfers_.size()
00665 << " registered monitors" << TLOG_ENDL;
00666 }
00667 for (auto& transfer : dispatcher_transfers_)
00668 {
00669 transfer->copyFragment(*fragmentPtr, 0);
00670 }
00671 }
00672 else
00673 {
00674 for (size_t i_q = dispatcher_transfers_.size() - new_transfers_; i_q < dispatcher_transfers_.size(); ++i_q)
00675 {
00676 TLOG_INFO(name_) << "Copying out init fragment, type " << static_cast<int>(init_fragment_ptr_->type()) <<
00677 ", size " << init_fragment_ptr_->sizeBytes() << TLOG_ENDL;
00678 dispatcher_transfers_[i_q]->copyFragment(*init_fragment_ptr_, 500000);
00679 }
00680 new_transfers_ = 0;
00681 }
00682 }
00683 }
00684
00685 stats_helper_.addSample(SHM_COPY_TIME_STAT_KEY,
00686 (artdaq::MonitoredQuantity::getCurrentTime() - startTime));
00687
00688
00689
00690 artdaq::Fragment::sequence_id_t seq = fragmentPtr->sequenceID();
00691 TRACE(21, "%s::process_fragments seq=%lu isLogger=%d type=%d"
00692 , name_.c_str(), seq, is_data_logger_, fragmentPtr->type());
00693 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00694 if (!art_initialized_)
00695 {
00696
00697
00698 if (fragmentPtr->type() == artdaq::Fragment::InitFragmentType)
00699 {
00700 TLOG_DEBUG(name_) << "Init" << TLOG_ENDL;
00701
00702 if (is_data_logger_ && sender_ptr_)
00703 {
00704 auto fragCopy = *fragmentPtr;
00705 sender_ptr_->sendFragment(std::move(fragCopy));
00706
00707 }
00708
00709 artdaq::RawEvent_ptr initEvent(new artdaq::RawEvent(run_id_.run(), 1, fragmentPtr->sequenceID()));
00710 initEvent->insertFragment(std::move(fragmentPtr));
00711
00712 bool enqStatus = event_queue_.enqTimedWait(initEvent, enq_timeout_);
00713
00714 if (!enqStatus)
00715 {
00716 TLOG_ERROR(name_) << "Attempt to send Init event to art timed out after " <<
00717 enq_timeout_.count() << " seconds; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00718 }
00719 art_initialized_ = true;
00720 }
00721 else
00722 {
00723 TLOG_ERROR(name_) << "Didn't receive an Init event with which to initialize art; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00724 }
00725 }
00726 else
00727 {
00728
00729
00730 if (fragmentPtr->type() == artdaq::Fragment::DataFragmentType)
00731 {
00732 if (is_data_logger_)
00733 {
00734 artdaq::FragmentPtr rejectedFragment;
00735 auto seqId = fragmentPtr->sequenceID();
00736 bool try_again = true;
00737 while (try_again)
00738 {
00739 auto ret = event_store_ptr_->insert(std::move(fragmentPtr), rejectedFragment);
00740 if (ret == EventStore::EventStoreInsertResult::SUCCESS)
00741 {
00742 receiver_ptr_->unsuppressAll();
00743 try_again = false;
00744 }
00745 else if (ret == EventStore::EventStoreInsertResult::SUCCESS_STOREFULL)
00746 {
00747 try_again = false;
00748 }
00749 else if (stop_requested_.load())
00750 {
00751 try_again = false;
00752 process_fragments = false;
00753 receiver_ptr_->reject_fragment(senderSlot, std::move(rejectedFragment));
00754 TLOG_WARNING(name_)
00755 << "Unable to process event " << seqId
00756 << " because of back-pressure - forcibly ending the run." << TLOG_ENDL;
00757 }
00758 else if (local_pause_requested_.load())
00759 {
00760 try_again = false;
00761 process_fragments = false;
00762 receiver_ptr_->reject_fragment(senderSlot, std::move(rejectedFragment));
00763 TLOG_WARNING(name_)
00764 << "Unable to process event " << seqId
00765 << " because of back-pressure - forcibly pausing the run." << TLOG_ENDL;
00766 }
00767 else if (ret == EventStore::EventStoreInsertResult::REJECT_QUEUEFULL)
00768 {
00769 fragmentPtr = std::move(rejectedFragment);
00770 TLOG_WARNING(name_)
00771 << "Unable to process event " << seqId
00772 << " because of back-pressure from art - retrying..." << TLOG_ENDL;
00773 }
00774 else
00775 {
00776 try_again = false;
00777 receiver_ptr_->reject_fragment(senderSlot, std::move(rejectedFragment));
00778 TLOG_WARNING(name_)
00779 << "Unable to process event " << seqId
00780 << " because the EventStore has reached the maximum number of incomplete bunches." << std::endl
00781 << " Will retry when the EventStore is ready for new events." << TLOG_ENDL;
00782 }
00783 }
00784 }
00785 else
00786 {
00787 event_store_ptr_->insert(std::move(fragmentPtr), false);
00788 }
00789 }
00790 else if (fragmentPtr->type() == artdaq::Fragment::EndOfSubrunFragmentType)
00791 {
00792 if (is_data_logger_ && sender_ptr_)
00793 {
00794 auto fragCopy = *fragmentPtr;
00795 sender_ptr_->sendFragment(std::move(fragCopy));
00796 }
00797 else if (is_dispatcher_)
00798 {
00799 for (auto& transfer : dispatcher_transfers_)
00800 {
00801 transfer->copyFragment(*fragmentPtr, 0);
00802 }
00803 }
00804
00805
00806
00807
00808
00809 endSubRunMsg = std::move(fragmentPtr);
00810 }
00811 else if (fragmentPtr->type() == artdaq::Fragment::EndOfDataFragmentType)
00812 {
00813 eodFragmentsReceived++;
00814
00815
00816
00817 fragments_sent.setSlot(senderSlot, *fragmentPtr->dataBegin() + 1);
00818 }
00819 }
00820 float delta = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
00821 stats_helper_.addSample(STORE_EVENT_WAIT_STAT_KEY, delta);
00822 TRACE((delta > 3.0) ? 0 : 22, "%s::process_fragments seq=%lu isLogger=%d delta=%f start=%f"
00823 , name_.c_str(), seq, is_data_logger_, delta, startTime);
00824
00825
00826 startTime = artdaq::MonitoredQuantity::getCurrentTime();
00827 if (is_data_logger_ && disk_writing_directory_.size() > 0 &&
00828 !stop_requested_.load() && !system_pause_requested_.load())
00829 {
00830 bool threshold_reached = false;
00831 if (file_close_event_count_ > 0 &&
00832 event_count_in_subrun_ >= file_close_event_count_)
00833 {
00834 threshold_reached = true;
00835 }
00836 else
00837 {
00838 time_t now = time(0);
00839 if (file_close_timeout_secs_ > 0 &&
00840 (now - subrun_start_time_) >= file_close_timeout_secs_)
00841 {
00842 threshold_reached = true;
00843 }
00844 else
00845 {
00846 if (filesize_check_interval_seconds_ > 0 &&
00847 filesize_check_interval_events_ > 0 &&
00848 (now - last_filesize_check_time) >= filesize_check_interval_seconds_ &&
00849 (event_count_in_run_ % filesize_check_interval_events_) == 0)
00850 {
00851 if (file_close_threshold_bytes_ > 0 &&
00852 getLatestFileSize_() >= file_close_threshold_bytes_)
00853 {
00854 threshold_reached = true;
00855 }
00856 last_filesize_check_time = now;
00857 }
00858 }
00859 }
00860 if (threshold_reached)
00861 {
00862 system_pause_requested_.store(true);
00863 if (pause_thread_.get() != 0)
00864 {
00865 pause_thread_->join();
00866 }
00867 TLOG_DEBUG(name_) << "Starting sendPauseAndResume thread "
00868 << ", event count in subrun = "
00869 << event_count_in_subrun_ << TLOG_ENDL;
00870 pause_thread_.reset(new std::thread(&AggregatorCore::sendPauseAndResume_, this));
00871 }
00872 }
00873 stats_helper_.addSample(FILE_CHECK_TIME_STAT_KEY,
00874 (artdaq::MonitoredQuantity::getCurrentTime() - startTime));
00875
00876
00877
00878
00879
00880
00881 size_t source_count = 0;
00882 if (is_data_logger_) source_count = receiver_ptr_->enabled_sources().size();
00883 else source_count = 1;
00884
00885 if (eodFragmentsReceived >= source_count && endSubRunMsg != nullptr)
00886 {
00887 bool fragmentsOutstanding = false;
00888 if (is_data_logger_)
00889 {
00890 for (auto& i : receiver_ptr_->enabled_sources())
00891 {
00892 if (fragments_received[i] != fragments_sent[i])
00893 {
00894 fragmentsOutstanding = true;
00895 break;
00896 }
00897 }
00898 }
00899
00900 if (!fragmentsOutstanding)
00901 {
00902 event_store_ptr_->flushData();
00903 artdaq::RawEvent_ptr subRunEvent(new artdaq::RawEvent(run_id_.run(), 1, 0));
00904 subRunEvent->insertFragment(std::move(endSubRunMsg));
00905
00906 bool enqStatus = event_queue_.enqTimedWait(subRunEvent, enq_timeout_);
00907
00908 if (!enqStatus)
00909 {
00910 TLOG_ERROR(name_) << "All data appears to have been received but attempt to send EndOfSubRun fragment to art timed out after " <<
00911 enq_timeout_.count() << " seconds; DAQ may need to be returned to the \"Stopped\" state before further datataking" << TLOG_ENDL;
00912 }
00913 process_fragments = false;
00914 }
00915 else
00916 {
00917 TLOG_WARNING(name_) << "EndOfSubRun fragment and all EndOfData fragments received but more data expected" << TLOG_ENDL;
00918 }
00919 }
00920 }
00921
00922 logMessage_("Subrun " +
00923 boost::lexical_cast<std::string>(event_store_ptr_->subrunID()) +
00924 " in run " + boost::lexical_cast<std::string>(run_id_.run()) +
00925 " has ended. There were " +
00926 boost::lexical_cast<std::string>(event_count_in_subrun_) +
00927 " events in this subrun, and there have been " +
00928 boost::lexical_cast<std::string>(event_count_in_run_) +
00929 " events so far in this run.");
00930
00931 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00932 getMonitoredQuantity(INPUT_EVENTS_STAT_KEY);
00933 if (mqPtr.get() != 0)
00934 {
00935 artdaq::MonitoredQuantityStats stats;
00936 mqPtr->getStats(stats);
00937 std::ostringstream oss;
00938 oss << "Run " << run_id_.run() << " has an overall event rate of ";
00939 oss << std::fixed << std::setprecision(1) << stats.fullSampleRate;
00940 oss << " events/sec.";
00941 logMessage_(oss.str());
00942 previous_run_duration_ = stats.fullDuration;
00943 }
00944
00945
00946
00947
00948 metricMan_.do_stop();
00949
00950 receiver_ptr_.reset(nullptr);
00951 sender_ptr_.reset(nullptr);
00952
00953 processing_fragments_.store(false);
00954 return 0;
00955 }
00956
00957 std::string artdaq::AggregatorCore::report(std::string const& which) const
00958 {
00959 if (which == "event_count")
00960 {
00961 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00962 getMonitoredQuantity(INPUT_EVENTS_STAT_KEY);
00963 if (mqPtr.get() != 0)
00964 {
00965 return boost::lexical_cast<std::string>(mqPtr->getFullSampleCount());
00966 }
00967 else
00968 {
00969 return "-1";
00970 }
00971 }
00972
00973 if (which == "run_duration")
00974 {
00975
00976
00977 double duration = previous_run_duration_;
00978 if (processing_fragments_.load())
00979 {
00980 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
00981 getMonitoredQuantity(INPUT_EVENTS_STAT_KEY);
00982 if (mqPtr.get() != 0)
00983 {
00984 duration = mqPtr->getFullDuration();
00985 }
00986 }
00987 std::ostringstream oss;
00988 oss << std::fixed << std::setprecision(1) << duration;
00989 return oss.str();
00990 }
00991
00992 if (which == "file_size")
00993 {
00994 size_t latestFileSize = getLatestFileSize_();
00995 return boost::lexical_cast<std::string>(latestFileSize);
00996 }
00997
00998 if (which == "subrun_number")
00999 {
01000 if (event_store_ptr_.get() != nullptr)
01001 {
01002 return boost::lexical_cast<std::string>(event_store_ptr_->subrunID());
01003 }
01004 else
01005 {
01006 return "-1";
01007 }
01008 }
01009
01010 if (which == "incomplete_event_count")
01011 {
01012 if (event_store_ptr_ != nullptr)
01013 {
01014 return boost::lexical_cast<std::string>(event_store_ptr_->incompleteEventCount());
01015 }
01016 else
01017 {
01018 return "-1";
01019 }
01020 }
01021
01022
01023
01024
01025
01026
01027 std::string tmpString = name_ + " run number = ";
01028 tmpString.append(boost::lexical_cast<std::string>(run_id_.run()));
01029 tmpString.append(". Command=\"" + which + "\" is not currently supported.");
01030 return tmpString;
01031 }
01032
01033 std::string artdaq::AggregatorCore::register_monitor(fhicl::ParameterSet const& pset)
01034 {
01035 TLOG_DEBUG(name_) << "AggregatorCore::register_monitor called with argument \"" << pset.to_string() << "\"" << TLOG_ENDL;
01036 std::lock_guard<std::mutex> lock(dispatcher_transfers_mutex_);
01037
01038 try
01039 {
01040 auto transfer = MakeTransferPlugin(pset, "transfer_plugin", TransferInterface::Role::kSend);
01041
01042 for (auto& existing_transfer_ : dispatcher_transfers_)
01043 {
01044 if (existing_transfer_->uniqueLabel() == transfer->uniqueLabel())
01045 {
01046 std::stringstream errmsg;
01047 errmsg << "Attempt to register newly-created monitor with label \"" <<
01048 transfer->uniqueLabel() << "\" failed; a monitor with that label already exists";
01049 return errmsg.str();
01050 }
01051 }
01052
01053 dispatcher_transfers_.emplace_back(std::move(transfer));
01054
01055 TLOG_INFO(name_) << "Successfully registered monitor with label \"" << dispatcher_transfers_.back()->uniqueLabel() << "\"" << TLOG_ENDL;
01056
01057 new_transfers_++;
01058 }
01059 catch (...)
01060 {
01061 std::stringstream errmsg;
01062 errmsg << "Unable to create a Transfer plugin with the FHiCL code \"" << pset.to_string() << "\", a new monitor has not been registered";
01063 return errmsg.str();
01064 }
01065
01066 return "Success";
01067 }
01068
01069 std::string artdaq::AggregatorCore::unregister_monitor(std::string const& label)
01070 {
01071 TLOG_DEBUG(name_) << "AggregatorCore::unregister_monitor called with argument \"" << label << "\"" << TLOG_ENDL;
01072 std::lock_guard<std::mutex> lock(dispatcher_transfers_mutex_);
01073
01074 try
01075 {
01076 auto r_i_end = std::remove_if(dispatcher_transfers_.begin(),
01077 dispatcher_transfers_.end(),
01078 [label](const std::unique_ptr<TransferInterface>& transfer)
01079 {
01080 return transfer->uniqueLabel() == label;
01081 });
01082
01083 auto nfound = dispatcher_transfers_.end() - r_i_end;
01084
01085 TLOG_INFO(name_) << "Request from monitor with label \"" << label << "\" to unregister received" << TLOG_ENDL;
01086
01087 if (nfound == 1)
01088 {
01089 dispatcher_transfers_.pop_back();
01090 return "Success";
01091 }
01092 else if (nfound == 0)
01093 {
01094 std::stringstream errmsg;
01095 errmsg << "Warning in AggregatorCore::unregister_monitor: unable to find requested transfer plugin with "
01096 << "label \"" << label << "\"";
01097 TLOG_WARNING(name_) << errmsg.str() << TLOG_ENDL;
01098 return errmsg.str();
01099 }
01100 else
01101 {
01102 std::stringstream errmsg;
01103 errmsg << "Warning in AggregatorCore::unregister_monitor: found more than one (" << nfound <<
01104 ") transfer plugins with label \"" << label << "\", will unregister all of them";
01105 TLOG_WARNING(name_) << errmsg.str() << TLOG_ENDL;
01106 dispatcher_transfers_.erase(r_i_end, dispatcher_transfers_.end());
01107 return errmsg.str();
01108 }
01109 }
01110 catch (...)
01111 {
01112 std::stringstream errmsg;
01113 errmsg << "Unable to unregister transfer plugin with label \"" << label << "\"";
01114 return errmsg.str();
01115 }
01116
01117 return "Success";
01118 }
01119
01120
01121 size_t artdaq::AggregatorCore::getLatestFileSize_() const
01122 {
01123 if (disk_writing_directory_.size() == 0)
01124 {
01125 TLOG_DEBUG(name_) << "Latest file size = 0 (no directory)" << TLOG_ENDL;
01126 return 0;
01127 }
01128 BFS::path outputDir(disk_writing_directory_);
01129 BFS::directory_iterator endIter;
01130
01131 std::time_t latestFileTime = 0;
01132 size_t latestFileSize = 0;
01133 if (BFS::exists(outputDir) && BFS::is_directory(outputDir))
01134 {
01135 for (BFS::directory_iterator dirIter(outputDir); dirIter != endIter; ++dirIter)
01136 {
01137 BFS::path pathObj = dirIter->path();
01138 if (pathObj.filename().string().find("RootOutput") != std::string::npos &&
01139 pathObj.filename().string().find("root") != std::string::npos)
01140 {
01141 if (BFS::last_write_time(pathObj) >= latestFileTime)
01142 {
01143 latestFileTime = BFS::last_write_time(pathObj);
01144 latestFileSize = BFS::file_size(pathObj);
01145 }
01146 }
01147 }
01148 }
01149 time_t now = time(0);
01150 if ((now - latestFileTime) < 60)
01151 {
01152 TLOG_DEBUG(name_) << "Latest file size = "
01153 << latestFileSize << TLOG_ENDL;
01154 return latestFileSize;
01155 }
01156 else
01157 {
01158 TLOG_DEBUG(name_) << "Latest file size = 0 (too old)" << TLOG_ENDL;
01159 return 0;
01160 }
01161 }
01162
01163 bool artdaq::AggregatorCore::sendPauseAndResume_()
01164 {
01165 xmlrpc_c::clientSimple myClient;
01166 TLOG_INFO(name_) << "Starting automatic pause..." << TLOG_ENDL;
01167 for (size_t igrp = 0; igrp < xmlrpc_client_lists_.size(); ++igrp)
01168 {
01169 for (size_t idx = 0; idx < xmlrpc_client_lists_[igrp].size(); ++idx)
01170 {
01171 for (size_t iAttempt = 0; iAttempt < 5; ++iAttempt)
01172 {
01173 xmlrpc_c::value result;
01174 myClient.call((xmlrpc_client_lists_[igrp])[idx], "daq.pause", &result);
01175 std::string const resultString = xmlrpc_c::value_string(result);
01176 TLOG_DEBUG(name_) << "Pause: "
01177 << (xmlrpc_client_lists_[igrp])[idx]
01178 << " " << resultString << TLOG_ENDL;
01179 if (std::string::npos !=
01180 boost::algorithm::to_lower_copy(resultString).find("success"))
01181 {
01182 break;
01183 }
01184 else
01185 {
01186 sleep(2);
01187 TLOG_WARNING(name_) << "Retrying pause command to "
01188 << (xmlrpc_client_lists_[igrp])[idx]
01189 << " (" << resultString << ")" << TLOG_ENDL;
01190 }
01191 }
01192 }
01193 }
01194 TLOG_INFO(name_) << "Starting automatic resume..." << TLOG_ENDL;
01195 for (int igrp = (xmlrpc_client_lists_.size() - 1); igrp >= 0; --igrp)
01196 {
01197 for (size_t idx = 0; idx < xmlrpc_client_lists_[igrp].size(); ++idx)
01198 {
01199 for (size_t iAttempt = 0; iAttempt < 5; ++iAttempt)
01200 {
01201 xmlrpc_c::value result;
01202 myClient.call((xmlrpc_client_lists_[igrp])[idx], "daq.resume", &result);
01203 std::string const resultString = xmlrpc_c::value_string(result);
01204 TLOG_DEBUG(name_) << "Resume: "
01205 << (xmlrpc_client_lists_[igrp])[idx]
01206 << " " << resultString << TLOG_ENDL;
01207 if (std::string::npos !=
01208 boost::algorithm::to_lower_copy(resultString).find("success"))
01209 {
01210 break;
01211 }
01212 else
01213 {
01214 sleep(2);
01215 TLOG_WARNING(name_) << "Retrying resume command to "
01216 << (xmlrpc_client_lists_[igrp])[idx]
01217 << " (" << resultString << ")" << TLOG_ENDL;
01218 }
01219 }
01220 }
01221 }
01222 TLOG_INFO(name_) << "Done with automatic resume..." << TLOG_ENDL;
01223 system_pause_requested_.store(false);
01224 return true;
01225 }
01226
01227 void artdaq::AggregatorCore::logMessage_(std::string const& text)
01228 {
01229 if (is_data_logger_)
01230 {
01231 TLOG_INFO(name_) << text << TLOG_ENDL;
01232 }
01233 else
01234 {
01235 TLOG_DEBUG(name_) << text << TLOG_ENDL;
01236 }
01237 }
01238
01239 std::string artdaq::AggregatorCore::buildStatisticsString_()
01240 {
01241 std::ostringstream oss;
01242 double eventCount = 1.0;
01243 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
01244 getMonitoredQuantity(INPUT_EVENTS_STAT_KEY);
01245 if (mqPtr.get() != 0)
01246 {
01247
01248 artdaq::MonitoredQuantityStats stats;
01249 mqPtr->getStats(stats);
01250 oss << "Input statistics: "
01251 << stats.recentSampleCount << " events received at "
01252 << stats.recentSampleRate << " events/sec, data rate = "
01253 << (stats.recentValueRate * sizeof(artdaq::RawDataType)
01254 / 1024.0 / 1024.0) << " MB/sec, monitor window = "
01255 << stats.recentDuration << " sec, min::max event size = "
01256 << (stats.recentValueMin * sizeof(artdaq::RawDataType)
01257 / 1024.0 / 1024.0)
01258 << "::"
01259 << (stats.recentValueMax * sizeof(artdaq::RawDataType)
01260 / 1024.0 / 1024.0)
01261 << " MB" << std::endl;
01262 eventCount = std::max(double(stats.recentSampleCount), 1.0);
01263 oss << "Average times per event: ";
01264 if (stats.recentSampleRate > 0.0)
01265 {
01266 oss << " elapsed time = "
01267 << (1.0 / stats.recentSampleRate) << " sec";
01268 }
01269 }
01270
01271
01272
01273
01274
01275
01276
01277 mqPtr = artdaq::StatisticsCollection::getInstance().
01278 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
01279 if (mqPtr.get() != 0)
01280 {
01281 oss << ", input wait time = "
01282 << (mqPtr->getRecentValueSum() / eventCount) << " sec";
01283 }
01284
01285 mqPtr = artdaq::StatisticsCollection::getInstance().
01286 getMonitoredQuantity(STORE_EVENT_WAIT_STAT_KEY);
01287 if (mqPtr.get() != 0)
01288 {
01289 artdaq::MonitoredQuantityStats stats;
01290 mqPtr->getStats(stats);
01291 oss << ", avg::max event store wait time = "
01292 << (stats.recentValueSum / eventCount)
01293 << "::" << stats.recentValueMax
01294 << " sec";
01295 }
01296
01297 mqPtr = artdaq::StatisticsCollection::getInstance().
01298 getMonitoredQuantity(SHM_COPY_TIME_STAT_KEY);
01299 if (mqPtr.get() != 0)
01300 {
01301 oss << ", shared memory copy time = "
01302 << (mqPtr->getRecentValueSum() / eventCount) << " sec";
01303 }
01304
01305 mqPtr = artdaq::StatisticsCollection::getInstance().
01306 getMonitoredQuantity(FILE_CHECK_TIME_STAT_KEY);
01307 if (mqPtr.get() != 0)
01308 {
01309 oss << ", file size test time = "
01310 << (mqPtr->getRecentValueSum() / eventCount) << " sec";
01311 }
01312
01313 return oss.str();
01314 }
01315
01316 void artdaq::AggregatorCore::sendMetrics_()
01317 {
01318
01319 double eventCount = 1.0;
01320 artdaq::MonitoredQuantityPtr mqPtr = artdaq::StatisticsCollection::getInstance().
01321 getMonitoredQuantity(INPUT_EVENTS_STAT_KEY);
01322 if (mqPtr.get() != 0)
01323 {
01324 artdaq::MonitoredQuantityStats stats;
01325 mqPtr->getStats(stats);
01326 eventCount = std::max(double(stats.recentSampleCount), 1.0);
01327 metricMan_.sendMetric("Event Rate",
01328 stats.recentSampleRate, "events/sec", 1);
01329 metricMan_.sendMetric("Average Event Size",
01330 (stats.recentValueAverage * sizeof(artdaq::RawDataType)
01331 ), "bytes/event", 2);
01332 metricMan_.sendMetric("Data Rate",
01333 (stats.recentValueRate * sizeof(artdaq::RawDataType)
01334 ), "bytes/sec", 2);
01335 }
01336
01337
01338
01339
01340
01341
01342
01343 mqPtr = artdaq::StatisticsCollection::getInstance().
01344 getMonitoredQuantity(INPUT_WAIT_STAT_KEY);
01345 if (mqPtr.get() != 0)
01346 {
01347 metricMan_.sendMetric("Average Input Wait Time",
01348 (mqPtr->getRecentValueSum() / eventCount),
01349 "seconds/event", 3);
01350 }
01351
01352 mqPtr = artdaq::StatisticsCollection::getInstance().
01353 getMonitoredQuantity(STORE_EVENT_WAIT_STAT_KEY);
01354 if (mqPtr.get() != 0)
01355 {
01356 metricMan_.sendMetric("Avg art Queue Wait Time",
01357 (mqPtr->getRecentValueSum() / eventCount),
01358 "seconds/event", 3);
01359 }
01360
01361 mqPtr = artdaq::StatisticsCollection::getInstance().
01362 getMonitoredQuantity(SHM_COPY_TIME_STAT_KEY);
01363 if (mqPtr.get() != 0)
01364 {
01365 metricMan_.sendMetric("Avg Shared Memory Copy Time",
01366 (mqPtr->getRecentValueSum() / eventCount),
01367 "seconds/event", 4);
01368 }
01369
01370 mqPtr = artdaq::StatisticsCollection::getInstance().
01371 getMonitoredQuantity(FILE_CHECK_TIME_STAT_KEY);
01372 if (mqPtr.get() != 0)
01373 {
01374 metricMan_.sendMetric("Average File Check Time",
01375 (mqPtr->getRecentValueSum() / eventCount),
01376 "seconds/event", 4);
01377 }
01378 }