00001 #include "artdaq/Application/CommandableFragmentGenerator.hh"
00002
00003 #include <boost/exception/all.hpp>
00004 #include <boost/throw_exception.hpp>
00005
00006 #include <limits>
00007 #include <iterator>
00008
00009 #include "canvas/Utilities/Exception.h"
00010 #include "cetlib_except/exception.h"
00011 #include "fhiclcpp/ParameterSet.h"
00012 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
00013 #include "artdaq-core/Data/Fragment.hh"
00014 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
00015 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00016 #include "artdaq-core/Utilities/TimeUtils.hh"
00017
00018 #include <fstream>
00019 #include <iomanip>
00020 #include <iterator>
00021 #include <iostream>
00022 #include <iomanip>
00023 #include <sys/poll.h>
00024 #include "artdaq/DAQdata/TCPConnect.hh"
00025
00026 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator()
00027 : mutex_()
00028 , request_port_(3001)
00029 , request_addr_("227.128.12.26")
00030 , requests_()
00031 , request_stop_requested_(false)
00032 , request_received_(false)
00033 , end_of_run_timeout_ms_(1000)
00034 , windowOffset_(0)
00035 , windowWidth_(0)
00036 , staleTimeout_(Fragment::InvalidTimestamp)
00037 , expectedType_(Fragment::EmptyFragmentType)
00038 , maxFragmentCount_(std::numeric_limits<size_t>::max())
00039 , uniqueWindows_(true)
00040 , last_window_send_time_()
00041 , missing_request_window_timeout_us_(1000000)
00042 , window_close_timeout_us_(2000000)
00043 , useDataThread_(false)
00044 , sleep_on_no_data_us_(0)
00045 , data_thread_running_(false)
00046 , dataBufferDepthFragments_(0)
00047 , dataBufferDepthBytes_(0)
00048 , maxDataBufferDepthFragments_(1000)
00049 , maxDataBufferDepthBytes_(1000)
00050 , useMonitoringThread_(false)
00051 , monitoringInterval_(0)
00052 , lastMonitoringCall_()
00053 , isHardwareOK_(true)
00054 , dataBuffer_()
00055 , newDataBuffer_()
00056 , run_number_(-1)
00057 , subrun_number_(-1)
00058 , timeout_(std::numeric_limits<uint64_t>::max())
00059 , timestamp_(std::numeric_limits<uint64_t>::max())
00060 , should_stop_(false)
00061 , exception_(false)
00062 , force_stop_(false)
00063 , latest_exception_report_("none")
00064 , ev_counter_(1)
00065 , board_id_(-1)
00066 , instance_name_for_metrics_("FragmentGenerator")
00067 , sleep_on_stop_us_(0)
00068 {}
00069
00070 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(const fhicl::ParameterSet& ps)
00071 : mutex_()
00072 , request_port_(ps.get<int>("request_port", 3001))
00073 , request_addr_(ps.get<std::string>("request_address", "227.128.12.26"))
00074 , requests_()
00075 , request_stop_requested_(false)
00076 , request_received_(false)
00077 , end_of_run_timeout_ms_(ps.get<size_t>("end_of_run_quiet_timeout_ms", 1000))
00078 , windowOffset_(ps.get<Fragment::timestamp_t>("request_window_offset", 0))
00079 , windowWidth_(ps.get<Fragment::timestamp_t>("request_window_width", 0))
00080 , staleTimeout_(ps.get<Fragment::timestamp_t>("stale_request_timeout", 0xFFFFFFFF))
00081 , expectedType_(ps.get<Fragment::type_t>("expected_fragment_type", Fragment::type_t(Fragment::EmptyFragmentType)))
00082 , uniqueWindows_(ps.get<bool>("request_windows_are_unique", true))
00083 , last_window_send_time_(std::chrono::steady_clock::now())
00084 , missing_request_window_timeout_us_(ps.get<size_t>("missing_request_window_timeout_us", 1000000))
00085 , window_close_timeout_us_(ps.get<size_t>("window_close_timeout_us", 2000000))
00086 , useDataThread_(ps.get<bool>("separate_data_thread", false))
00087 , sleep_on_no_data_us_(ps.get<size_t>("sleep_on_no_data_us", 0))
00088 , data_thread_running_(false)
00089 , dataBufferDepthFragments_(0)
00090 , dataBufferDepthBytes_(0)
00091 , maxDataBufferDepthFragments_(ps.get<int>("data_buffer_depth_fragments", 1000))
00092 , maxDataBufferDepthBytes_(ps.get<size_t>("data_buffer_depth_mb", 1000) * 1024 * 1024)
00093 , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
00094 , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
00095 , lastMonitoringCall_()
00096 , isHardwareOK_(true)
00097 , dataBuffer_()
00098 , newDataBuffer_()
00099 , run_number_(-1)
00100 , subrun_number_(-1)
00101 , timeout_(std::numeric_limits<uint64_t>::max())
00102 , timestamp_(std::numeric_limits<uint64_t>::max())
00103 , should_stop_(false)
00104 , exception_(false)
00105 , force_stop_(false)
00106 , latest_exception_report_("none")
00107 , ev_counter_(1)
00108 , board_id_(-1)
00109 , sleep_on_stop_us_(0)
00110 {
00111 board_id_ = ps.get<int>("board_id");
00112 instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
00113
00114 fragment_ids_ = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
00115
00116 TLOG_TRACE("CommandableFragmentGenerator") << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)" << TLOG_ENDL;
00117 int fragment_id = ps.get<int>("fragment_id", -99);
00118
00119 if (fragment_id != -99)
00120 {
00121 if (fragment_ids_.size() != 0)
00122 {
00123 latest_exception_report_ = "Error in CommandableFragmentGenerator: can't both define \"fragment_id\" and \"fragment_ids\" in FHiCL document";
00124 throw cet::exception(latest_exception_report_);
00125 }
00126 else
00127 {
00128 fragment_ids_.emplace_back(fragment_id);
00129 }
00130 }
00131
00132 sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
00133
00134 dataBuffer_.emplace_back(FragmentPtr(new Fragment()));
00135 (*dataBuffer_.begin())->setSystemType(Fragment::EmptyFragmentType);
00136
00137 std::string modeString = ps.get<std::string>("request_mode", "ignored");
00138 if (modeString == "single" || modeString == "Single")
00139 {
00140 mode_ = RequestMode::Single;
00141 }
00142 else if (modeString.find("buffer") != std::string::npos || modeString.find("Buffer") != std::string::npos)
00143 {
00144 mode_ = RequestMode::Buffer;
00145 }
00146 else if (modeString == "window" || modeString == "Window")
00147 {
00148 mode_ = RequestMode::Window;
00149 }
00150 else if (modeString.find("ignore") != std::string::npos || modeString.find("Ignore") != std::string::npos)
00151 {
00152 mode_ = RequestMode::Ignored;
00153 }
00154 TLOG_DEBUG("CommandableFragmentGenerator") << "Request mode is " << printMode_() << TLOG_ENDL;
00155
00156 if (mode_ != RequestMode::Ignored)
00157 {
00158 if (!useDataThread_)
00159 {
00160 latest_exception_report_ = "Error in CommandableFragmentGenerator: use_data_thread must be true when request_mode is not \"Ignored\"!";
00161 throw cet::exception(latest_exception_report_);
00162 }
00163 setupRequestListener();
00164 }
00165 }
00166
00167 void artdaq::CommandableFragmentGenerator::setupRequestListener()
00168 {
00169 request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
00170 if (!request_socket_)
00171 {
00172 throw art::Exception(art::errors::Configuration) << "CommandableFragmentGenerator: Error creating socket for receiving data requests!" << std::endl;
00173 exit(1);
00174 }
00175
00176 struct sockaddr_in si_me_request;
00177
00178 int yes = 1;
00179 if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
00180 {
00181 throw art::Exception(art::errors::Configuration) <<
00182 "RequestedFragmentGenrator: Unable to enable port reuse on request socket" << std::endl;
00183 exit(1);
00184 }
00185 memset(&si_me_request, 0, sizeof(si_me_request));
00186 si_me_request.sin_family = AF_INET;
00187 si_me_request.sin_port = htons(request_port_);
00188 si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
00189 if (bind(request_socket_, (struct sockaddr *)&si_me_request, sizeof(si_me_request)) == -1)
00190 {
00191 throw art::Exception(art::errors::Configuration) <<
00192 "CommandableFragmentGenerator: Cannot bind request socket to port " << request_port_ << std::endl;
00193 exit(1);
00194 }
00195
00196 if (request_addr_ != "localhost")
00197 {
00198 struct ip_mreq mreq;
00199 int sts = ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
00200 if (sts == -1)
00201 {
00202 throw art::Exception(art::errors::Configuration) << "Unable to resolve multicast request address" << std::endl;
00203 exit(1);
00204 }
00205 mreq.imr_interface.s_addr = htonl(INADDR_ANY);
00206 if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
00207 {
00208 throw art::Exception(art::errors::Configuration) <<
00209 "CommandableFragmentGenerator: Unable to join multicast group" << std::endl;
00210 exit(1);
00211 }
00212 }
00213 }
00214
00215 artdaq::CommandableFragmentGenerator::~CommandableFragmentGenerator()
00216 {
00217 force_stop_ = true;
00218 should_stop_ = true;
00219 TLOG_DEBUG("CommandableFragmentGenerator") << "Joining dataThread" << TLOG_ENDL;
00220 if (dataThread_.joinable()) dataThread_.join();
00221 TLOG_DEBUG("CommandableFragmentGenerator") << "Joining monitoringThread" << TLOG_ENDL;
00222 if (monitoringThread_.joinable()) monitoringThread_.join();
00223 TLOG_DEBUG("CommandableFragmentGenerator") << "Joining requestThread" << TLOG_ENDL;
00224 if (requestThread_.joinable()) requestThread_.join();
00225 if (request_socket_ != -1) close(request_socket_);
00226 }
00227
00228 bool artdaq::CommandableFragmentGenerator::getNext(FragmentPtrs& output)
00229 {
00230 bool result = true;
00231
00232 if (check_stop()) usleep(sleep_on_stop_us_);
00233 if (exception() || force_stop_) return false;
00234
00235 if (!useMonitoringThread_ && monitoringInterval_ > 0)
00236 {
00237 TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: Checking whether to collect Monitoring Data" << TLOG_ENDL;
00238 auto now = std::chrono::steady_clock::now();
00239 if (std::chrono::duration_cast<std::chrono::microseconds>(now - lastMonitoringCall_).count() >= monitoringInterval_)
00240 {
00241 TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: Collecting Monitoring Data" << TLOG_ENDL;
00242 isHardwareOK_ = checkHWStatus_();
00243 TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ << TLOG_ENDL;
00244 lastMonitoringCall_ = now;
00245 }
00246 }
00247
00248 try
00249 {
00250 std::lock_guard<std::mutex> lk(mutex_);
00251 if (useDataThread_)
00252 {
00253 TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Calling applyRequests" << TLOG_ENDL;
00254 result = applyRequests(output);
00255 TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Done with applyRequests" << TLOG_ENDL;
00256
00257 if (exception())
00258 {
00259 throw cet::exception("CommandableFragmentGenerator") << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
00260 }
00261 }
00262 else
00263 {
00264 if (!isHardwareOK_)
00265 {
00266 TLOG_ERROR("CommandableFragmentGenerator") << "Stopping CFG because the hardware reports bad status!" << TLOG_ENDL;
00267 return false;
00268 }
00269 TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Calling getNext_ " << std::to_string(ev_counter()) << TLOG_ENDL;
00270 try
00271 {
00272 result = getNext_(output);
00273 }
00274 catch (...)
00275 {
00276 throw;
00277 }
00278 TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Done with getNext_ " << std::to_string(ev_counter()) << TLOG_ENDL;
00279 }
00280 }
00281 catch (const cet::exception& e)
00282 {
00283 latest_exception_report_ = "cet::exception caught in getNext(): ";
00284 latest_exception_report_.append(e.what());
00285 TLOG_ERROR("CommandableFragmentGenerator") << "getNext: cet::exception caught: " << e << TLOG_ENDL;
00286 set_exception(true);
00287 return false;
00288 }
00289 catch (const boost::exception& e)
00290 {
00291 latest_exception_report_ = "boost::exception caught in getNext(): ";
00292 latest_exception_report_.append(boost::diagnostic_information(e));
00293 TLOG_ERROR("CommandableFragmentGenerator") << "getNext: boost::exception caught: " << boost::diagnostic_information(e) << TLOG_ENDL;
00294 set_exception(true);
00295 return false;
00296 }
00297 catch (const std::exception& e)
00298 {
00299 latest_exception_report_ = "std::exception caught in getNext(): ";
00300 latest_exception_report_.append(e.what());
00301 TLOG_ERROR("CommandableFragmentGenerator") << "getNext: std::exception caught: " << e.what() << TLOG_ENDL;
00302 set_exception(true);
00303 return false;
00304 }
00305 catch (...)
00306 {
00307 latest_exception_report_ = "Unknown exception caught in getNext().";
00308 TLOG_ERROR("CommandableFragmentGenerator") << "getNext: unknown exception caught" << TLOG_ENDL;
00309 set_exception(true);
00310 return false;
00311 }
00312
00313 if (!result)
00314 {
00315 TLOG_DEBUG("getNext") << "stopped " << TLOG_ENDL;
00316 }
00317
00318 return result;
00319 }
00320
00321 bool artdaq::CommandableFragmentGenerator::check_stop()
00322 {
00323 TLOG_ARB(14, "CommandableFragmentGeneraotr") << "CFG::check_stop: should_stop=" << should_stop() << ", useDataThread_=" << useDataThread_ << ", requests_.size()=" << std::to_string(requests_.size()) << ", exception status =" << int(exception()) << TLOG_ENDL;
00324
00325 if (!should_stop()) return false;
00326 if (!useDataThread_ || mode_ == RequestMode::Ignored) return true;
00327 if (force_stop_) return true;
00328
00329 if (!request_received_)
00330 {
00331 TLOG_ERROR("CommandableFragmentGenerator") << "Stop request received by request-based CommandableFragmentGenerator, but no requests have been received." << std::endl
00332 << "Check that UDP port " << request_port_ << " is open in the firewall config." << TLOG_ENDL;
00333 return true;
00334 }
00335
00336 if (!request_stop_requested_) return false;
00337
00338 auto dur = std::chrono::steady_clock::now() - request_stop_timeout_;
00339 return std::chrono::duration_cast<std::chrono::milliseconds>(dur).count() > static_cast<int>(end_of_run_timeout_ms_);
00340 }
00341
00342 int artdaq::CommandableFragmentGenerator::fragment_id() const
00343 {
00344 if (fragment_ids_.size() != 1)
00345 {
00346 throw cet::exception("Error in CommandableFragmentGenerator: can't call fragment_id() unless member fragment_ids_ vector is length 1");
00347 }
00348 else
00349 {
00350 return fragment_ids_[0];
00351 }
00352 }
00353
00354 size_t artdaq::CommandableFragmentGenerator::ev_counter_inc(size_t step, bool force)
00355 {
00356 if (force || mode_ == RequestMode::Ignored)
00357 {
00358 return ev_counter_.fetch_add(step);
00359 }
00360 return ev_counter_.load();
00361 }
00362
00363 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
00364 {
00365 if (run < 0) throw cet::exception("CommandableFragmentGenerator") << "negative run number";
00366
00367 timeout_ = timeout;
00368 timestamp_ = timestamp;
00369 ev_counter_.store(1);
00370 should_stop_.store(false);
00371 exception_.store(false);
00372 run_number_ = run;
00373 subrun_number_ = 1;
00374 latest_exception_report_ = "none";
00375 dataBuffer_.clear();
00376 requests_.clear();
00377
00378 start();
00379
00380 std::unique_lock<std::mutex> lk(mutex_);
00381 if (useDataThread_) startDataThread();
00382 if (useMonitoringThread_) startMonitoringThread();
00383 if (mode_ != RequestMode::Ignored) startRequestReceiverThread();
00384 }
00385
00386 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
00387 {
00388 TLOG_DEBUG("CommandableFragmentGenerator") << "Stop Command received." << TLOG_ENDL;
00389
00390 timeout_ = timeout;
00391 timestamp_ = timestamp;
00392
00393 stopNoMutex();
00394 should_stop_.store(true);
00395 std::unique_lock<std::mutex> lk(mutex_);
00396 stop();
00397 }
00398
00399 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
00400 {
00401 timeout_ = timeout;
00402 timestamp_ = timestamp;
00403
00404 pauseNoMutex();
00405 should_stop_.store(true);
00406 std::unique_lock<std::mutex> lk(mutex_);
00407
00408 pause();
00409 }
00410
00411 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
00412 {
00413 timeout_ = timeout;
00414 timestamp_ = timestamp;
00415
00416 subrun_number_ += 1;
00417 should_stop_ = false;
00418
00419 dataBuffer_.clear();
00420 requests_.clear();
00421
00422
00423 resume();
00424
00425 std::unique_lock<std::mutex> lk(mutex_);
00426 if (useDataThread_) startDataThread();
00427 if (useMonitoringThread_) startMonitoringThread();
00428 if (mode_ != RequestMode::Ignored) startRequestReceiverThread();
00429 }
00430
00431 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
00432 {
00433 std::lock_guard<std::mutex> lk(mutex_);
00434
00435
00436
00437
00438
00439
00440 std::string childReport = reportSpecific(which);
00441 if (childReport.length() > 0) { return childReport; }
00442
00443
00444 if (which == "latest_exception")
00445 {
00446 return latest_exception_report_;
00447 }
00448
00449
00450 childReport = report();
00451 if (childReport.length() > 0) { return childReport; }
00452
00453
00454 std::string tmpString = "The \"" + which + "\" command is not ";
00455 tmpString.append("currently supported by the ");
00456 tmpString.append(metricsReportingInstanceName());
00457 tmpString.append(" fragment generator.");
00458 return tmpString;
00459 }
00460
00461
00462 void artdaq::CommandableFragmentGenerator::pauseNoMutex()
00463 {
00464 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
00465 }
00466
00467 void artdaq::CommandableFragmentGenerator::pause()
00468 {
00469 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
00470 }
00471
00472 void artdaq::CommandableFragmentGenerator::resume()
00473 {
00474 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
00475 }
00476
00477 std::string artdaq::CommandableFragmentGenerator::report()
00478 {
00479 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
00480 return "";
00481 }
00482
00483 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const&)
00484 {
00485 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
00486 return "";
00487 }
00488
00489 bool artdaq::CommandableFragmentGenerator::checkHWStatus_()
00490 {
00491 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
00492 return true;
00493 }
00494
00495 void artdaq::CommandableFragmentGenerator::startDataThread()
00496 {
00497 if (dataThread_.joinable()) dataThread_.join();
00498 TLOG_INFO("CommandableFragmentGenerator") << "Starting Data Receiver Thread" << TLOG_ENDL;
00499 dataThread_ = std::thread(&CommandableFragmentGenerator::getDataLoop, this);
00500 }
00501
00502 void artdaq::CommandableFragmentGenerator::startMonitoringThread()
00503 {
00504 if (monitoringThread_.joinable()) monitoringThread_.join();
00505 TLOG_INFO("CommandableFragmentGenerator") << "Starting Hardware Monitoring Thread" << TLOG_ENDL;
00506 monitoringThread_ = std::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
00507 }
00508
00509 void artdaq::CommandableFragmentGenerator::startRequestReceiverThread()
00510 {
00511 if (requestThread_.joinable()) requestThread_.join();
00512 TLOG_INFO("CommandableFragmentGenerator") << "Starting Request Reception Thread" << TLOG_ENDL;
00513 requestThread_ = std::thread(&CommandableFragmentGenerator::receiveRequestsLoop, this);
00514 }
00515
00516 std::string artdaq::CommandableFragmentGenerator::printMode_()
00517 {
00518 switch (mode_)
00519 {
00520 case RequestMode::Single:
00521 return "Single";
00522 case RequestMode::Buffer:
00523 return "Buffer";
00524 case RequestMode::Window:
00525 return "Window";
00526 case RequestMode::Ignored:
00527 return "Ignored";
00528 }
00529
00530 return "ERROR";
00531 }
00532
00533 void artdaq::CommandableFragmentGenerator::getDataLoop()
00534 {
00535 data_thread_running_ = true;
00536 while (!force_stop_)
00537 {
00538 if (!isHardwareOK_)
00539 {
00540 TLOG_DEBUG("CommandableFragmentGenerator") << "getDataLoop: isHardwareOK is " << isHardwareOK_ << ", aborting data thread" << TLOG_ENDL;
00541 data_thread_running_ = false;
00542 return;
00543 }
00544
00545 TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: calling getNext_" << TLOG_ENDL;
00546
00547 bool data = false;
00548 auto startdata = std::chrono::steady_clock::now();
00549
00550 try
00551 {
00552 data = getNext_(newDataBuffer_);
00553 }
00554 catch (...)
00555 {
00556 ExceptionHandler(ExceptionHandlerRethrow::no,
00557 "Exception thrown by fragment generator in CommandableFragmentGenerator::getDataLoop; setting exception state to \"true\"");
00558 set_exception(true);
00559
00560 data_thread_running_ = false;
00561 return;
00562 }
00563
00564 TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: checking buffer size" << TLOG_ENDL;
00565 auto startwait = std::chrono::steady_clock::now();
00566
00567 if (newDataBuffer_.size() == 0 && sleep_on_no_data_us_ > 0)
00568 {
00569 usleep(sleep_on_no_data_us_);
00570 }
00571 if (metricMan)
00572 {
00573 metricMan->sendMetric("Avg Data Acquisition Time", std::chrono::duration_cast<artdaq::TimeUtils::seconds>(startwait - startdata).count(), "s", 3, artdaq::MetricMode::Average);
00574 }
00575
00576 auto first = true;
00577 auto lastwaittime = 0;
00578 while (dataBufferIsTooLarge())
00579 {
00580 if (should_stop())
00581 {
00582 TLOG_DEBUG("CommandableFragmentGenerator") << "Run ended while waiting for buffer to shrink!" << TLOG_ENDL;
00583 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00584 getDataBufferStats();
00585 dataCondition_.notify_all();
00586 data_thread_running_ = false;
00587 return;
00588 }
00589 auto waittime = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - startwait).count();
00590
00591 if (first || (waittime != lastwaittime && waittime % 1000 == 0))
00592 {
00593 TLOG_WARNING("CommandableFragmentGenerator") << "Bad Omen: Data Buffer has exceeded its size limits. Check the connection between the BoardReader and the EventBuilders! (seq_id=" << ev_counter() << ")" << TLOG_ENDL;
00594 first = false;
00595 }
00596 if (waittime % 5 && waittime != lastwaittime)
00597 {
00598 TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: Data Retreival paused for " << std::to_string(waittime) << " ms waiting for data buffer to drain" << TLOG_ENDL;
00599 }
00600 lastwaittime = waittime;
00601 usleep(1000);
00602 }
00603
00604 TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: processing data" << TLOG_ENDL;
00605 if (data && !force_stop_)
00606 {
00607 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00608 switch (mode_)
00609 {
00610 case RequestMode::Single:
00611
00612 while (newDataBuffer_.size() >= fragment_ids_.size())
00613 {
00614 dataBuffer_.clear();
00615 auto it = newDataBuffer_.begin();
00616 std::advance(it, fragment_ids_.size());
00617 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_, newDataBuffer_.begin(), it);
00618 }
00619 break;
00620 case RequestMode::Buffer:
00621 case RequestMode::Ignored:
00622 case RequestMode::Window:
00623 default:
00624
00625 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_);
00626 break;
00627 }
00628 getDataBufferStats();
00629 }
00630
00631 {
00632 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00633 if (dataBuffer_.size() > 0)
00634 {
00635 dataCondition_.notify_all();
00636 }
00637 }
00638 if (!data || force_stop_)
00639 {
00640 TLOG_INFO("CommandableFragmentGenerator") << "Data flow has stopped. Ending data collection thread" << TLOG_ENDL;
00641 data_thread_running_ = false;
00642 return;
00643 }
00644 }
00645 }
00646
00647 bool artdaq::CommandableFragmentGenerator::dataBufferIsTooLarge()
00648 {
00649 return (maxDataBufferDepthFragments_ > 0 && dataBufferDepthFragments_ > maxDataBufferDepthFragments_) || (maxDataBufferDepthBytes_ > 0 && dataBufferDepthBytes_ > maxDataBufferDepthBytes_);
00650 }
00651
00652 void artdaq::CommandableFragmentGenerator::getDataBufferStats()
00653 {
00655 dataBufferDepthFragments_ = dataBuffer_.size();
00656 size_t acc = 0;
00657 TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: Calculating buffer size" << TLOG_ENDL;
00658 for (auto i = dataBuffer_.begin(); i != dataBuffer_.end(); ++i)
00659 {
00660 if (i->get() != nullptr)
00661 {
00662 acc += (*i)->sizeBytes();
00663 }
00664 }
00665 dataBufferDepthBytes_ = acc;
00666
00667 if (metricMan)
00668 {
00669 TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: Sending Metrics" << TLOG_ENDL;
00670 metricMan->sendMetric("Buffer Depth Fragments", dataBufferDepthFragments_.load(), "fragments", 1, MetricMode::LastPoint);
00671 metricMan->sendMetric("Buffer Depth Bytes", dataBufferDepthBytes_.load(), "bytes", 1, MetricMode::LastPoint);
00672 }
00673 TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: frags=" << dataBufferDepthFragments_.load() << "/" << maxDataBufferDepthFragments_
00674 << ", sz=" << std::to_string(dataBufferDepthBytes_.load()) << "/" << std::to_string(maxDataBufferDepthBytes_) << TLOG_ENDL;
00675 }
00676
00677 void artdaq::CommandableFragmentGenerator::checkDataBuffer()
00678 {
00679 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00680 dataCondition_.wait_for(lock, std::chrono::milliseconds(10));
00681 if (dataBufferDepthFragments_ > 0)
00682 {
00683 if ((mode_ == RequestMode::Buffer || mode_ == RequestMode::Window))
00684 {
00685
00686 while (dataBufferIsTooLarge())
00687 {
00688 dataBuffer_.erase(dataBuffer_.begin());
00689 getDataBufferStats();
00690 }
00691 if (dataBuffer_.size() > 0)
00692 {
00693 TLOG_ARB(17, "CommandableFragmentGenerator") << "Determining if Fragments can be dropped from data buffer" << TLOG_ENDL;
00694 Fragment::timestamp_t last = dataBuffer_.back()->timestamp();
00695 Fragment::timestamp_t min = last > staleTimeout_ ? last - staleTimeout_ : 0;
00696 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00697 {
00698 if ((*it)->timestamp() < min)
00699 {
00700 it = dataBuffer_.erase(it);
00701 }
00702 else
00703 {
00704 ++it;
00705 }
00706 }
00707 getDataBufferStats();
00708 }
00709 }
00710 else if (mode_ == RequestMode::Single && dataBuffer_.size() > fragment_ids_.size())
00711 {
00712
00713 while (dataBuffer_.size() > fragment_ids_.size())
00714 {
00715 dataBuffer_.erase(dataBuffer_.begin());
00716 }
00717 }
00718 }
00719 }
00720
00721 void artdaq::CommandableFragmentGenerator::getMonitoringDataLoop()
00722 {
00723 while (!force_stop_)
00724 {
00725 if (should_stop() || monitoringInterval_ <= 0)
00726 {
00727 TLOG_DEBUG("CommandableFragmentGenerator") << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
00728 << " and monitoringInterval is " << monitoringInterval_ << ", returning" << TLOG_ENDL;
00729 return;
00730 }
00731 TLOG_ARB(12, "CommandableFragmentGenerator") << "getMonitoringDataLoop: Determining whether to call checkHWStatus_" << TLOG_ENDL;
00732
00733 auto now = std::chrono::steady_clock::now();
00734 if (std::chrono::duration_cast<std::chrono::microseconds>(now - lastMonitoringCall_).count() >= monitoringInterval_)
00735 {
00736 isHardwareOK_ = checkHWStatus_();
00737 TLOG_ARB(12, "CommandableFragmentGenerator") << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ << TLOG_ENDL;
00738 lastMonitoringCall_ = now;
00739 }
00740 usleep(monitoringInterval_ / 10);
00741 }
00742 }
00743
00744 void artdaq::CommandableFragmentGenerator::receiveRequestsLoop()
00745 {
00746 while (!force_stop_)
00747 {
00748 if (check_stop() || !isHardwareOK_ || exception())
00749 {
00750 TLOG_DEBUG("CommandableFragmentGenerator") << "receiveRequestsLoop: check_stop is " << std::boolalpha << check_stop()
00751 << ", isHardwareOK_ is " << isHardwareOK_ << ", and exception state is " << exception() << ", aborting request reception thread." << TLOG_ENDL;
00752 return;
00753 }
00754
00755
00756 if (mode_ == RequestMode::Ignored) return;
00757 TLOG_ARB(16, "CommandableFragmentGenerator") << "receiveRequestsLoop: Polling Request socket for new requests" << TLOG_ENDL;
00758
00759 int ms_to_wait = 1000;
00760 struct pollfd ufds[1];
00761 ufds[0].fd = request_socket_;
00762 ufds[0].events = POLLIN | POLLPRI;
00763 int rv = poll(ufds, 1, ms_to_wait);
00764 if (rv > 0)
00765 {
00766 if (ufds[0].revents == POLLIN || ufds[0].revents == POLLPRI)
00767 {
00768 TLOG_ARB(11, "CommandableFragmentGenerator") << "Recieved packet on Request channel" << TLOG_ENDL;
00769 detail::RequestHeader hdr_buffer;
00770 recv(request_socket_, &hdr_buffer, sizeof(hdr_buffer), 0);
00771 TLOG_ARB(11, "CommandableFragmentGenerator") << "Request header word: 0x" << std::hex << hdr_buffer.header << TLOG_ENDL;
00772 if (hdr_buffer.isValid())
00773 {
00774 request_received_ = true;
00775 if (hdr_buffer.mode == detail::RequestMessageMode::EndOfRun)
00776 {
00777 TLOG_INFO("CommandableFragmentGenerator") << "Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests..." << TLOG_ENDL;
00778 request_stop_timeout_ = std::chrono::steady_clock::now();
00779 request_stop_requested_ = true;
00780 }
00781 std::vector<detail::RequestPacket> pkt_buffer(hdr_buffer.packet_count);
00782 recv(request_socket_, &pkt_buffer[0], sizeof(detail::RequestPacket) * hdr_buffer.packet_count, 0);
00783 bool anyNew = false;
00784 for (auto& buffer : pkt_buffer)
00785 {
00786 if (!buffer.isValid()) continue;
00787 if (requests_.count(buffer.sequence_id) && requests_[buffer.sequence_id] != buffer.timestamp)
00788 {
00789 TLOG_ERROR("CommandableFragmentGenerator") << "Received conflicting request for SeqID "
00790 << std::to_string(buffer.sequence_id) << "!"
00791 << " Old ts=" << std::to_string(requests_[buffer.sequence_id])
00792 << ", new ts=" << std::to_string(buffer.timestamp) << ". Keeping OLD!" << TLOG_ENDL;
00793 }
00794 else if (!requests_.count(buffer.sequence_id))
00795 {
00796 int delta = buffer.sequence_id - ev_counter();
00797 TLOG_ARB(11, "CommandableFragmentGenerator") << "Recieved request for sequence ID " << std::to_string(buffer.sequence_id)
00798 << " and timestamp " << std::to_string(buffer.timestamp) << " (delta: " << delta << ")" << TLOG_ENDL;
00799 if (delta < 0)
00800 {
00801 TLOG_ARB(11, "CommandableFragmentGenerator") << "Already serviced this request! Ignoring..." << TLOG_ENDL;
00802 }
00803 else
00804 {
00805 std::unique_lock<std::mutex> tlk(request_mutex_);
00806 requests_[buffer.sequence_id] = buffer.timestamp;
00807 anyNew = true;
00808 }
00809 }
00810 }
00811 if (anyNew)
00812 {
00813 std::unique_lock<std::mutex> lock(request_mutex_);
00814 requestCondition_.notify_all();
00815 }
00816 }
00817 }
00818 }
00819 }
00820 }
00821
00822 bool artdaq::CommandableFragmentGenerator::applyRequests(artdaq::FragmentPtrs& frags)
00823 {
00824 if (check_stop() || exception())
00825 {
00826 return false;
00827 }
00828
00829 if (mode_ == RequestMode::Ignored)
00830 {
00831 while (dataBufferDepthFragments_ <= 0)
00832 {
00833 if (check_stop() || exception() || !isHardwareOK_) return false;
00834 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00835 dataCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return dataBufferDepthFragments_ > 0; });
00836 }
00837 }
00838 else
00839 {
00840 if ((check_stop() && requests_.size() <= 0) || exception()) return false;
00841 checkDataBuffer();
00842
00843 while (requests_.size() <= 0)
00844 {
00845 if (check_stop() || exception()) return false;
00846
00847 checkDataBuffer();
00848
00849 std::unique_lock<std::mutex> lock(request_mutex_);
00850 requestCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return requests_.size() > 0; });
00851 }
00852 }
00853
00854 {
00855 std::unique_lock<std::mutex> dlk(dataBufferMutex_);
00856 std::unique_lock<std::mutex> rlk(request_mutex_);
00857
00858
00859 if (mode_ == RequestMode::Ignored)
00860 {
00861
00862 TLOG_ARB(9, "CommandableFragmentGenerator") << "Mode is Ignored; Copying data to output" << TLOG_ENDL;
00863 std::move(dataBuffer_.begin(), dataBuffer_.end(), std::inserter(frags, frags.end()));
00864 dataBuffer_.clear();
00865 }
00866 else if (mode_ == RequestMode::Single)
00867 {
00868
00869 sendEmptyFragments(frags);
00870
00871 if (dataBuffer_.size() > 0)
00872 {
00873 TLOG_ARB(9, "CommandableFragmentGenerator") << "Mode is Single; Sending copy of last event" << TLOG_ENDL;
00874 for (auto& fragptr : dataBuffer_)
00875 {
00876
00877 auto frag = fragptr.get();
00878 auto newfrag = std::unique_ptr<artdaq::Fragment>(new Fragment(ev_counter(), frag->fragmentID()));
00879 newfrag->resize(frag->size() - detail::RawFragmentHeader::num_words());
00880 memcpy(newfrag->headerAddress(), frag->headerAddress(), frag->sizeBytes());
00881 newfrag->setTimestamp(requests_[ev_counter()]);
00882 newfrag->setSequenceID(ev_counter());
00883 frags.push_back(std::move(newfrag));
00884 }
00885 }
00886 else
00887 {
00888 sendEmptyFragment(frags, ev_counter(), "No data for");
00889 }
00890 requests_.clear();
00891 ev_counter_inc(1, true);
00892 }
00893 else if (mode_ == RequestMode::Buffer || mode_ == RequestMode::Window)
00894 {
00895 if (mode_ == RequestMode::Buffer || static_cast<size_t>(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - last_window_send_time_).count()) > missing_request_window_timeout_us_)
00896 {
00897 if (mode_ == RequestMode::Window)
00898 {
00899 TLOG_ERROR("CommandableFragmentGenerator") << "Data-taking has paused for " << std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - last_window_send_time_).count() << " us "
00900 << "(> " << std::to_string(missing_request_window_timeout_us_) << " us) while waiting for missing data request messages." << " Sending Empty Fragments for missing requests!" << TLOG_ENDL;
00901 }
00902 sendEmptyFragments(frags);
00903 }
00904 for (auto req = requests_.begin(); req != requests_.end();)
00905 {
00906 auto ts = req->second;
00907 if (req->first < ev_counter())
00908 {
00909 req = requests_.erase(req);
00910 continue;
00911 }
00912 while (req->first > ev_counter() && request_stop_requested_ && std::chrono::duration_cast<std::chrono::seconds>(std::chrono::steady_clock::now() - request_stop_timeout_).count() > 1)
00913 {
00914 sendEmptyFragment(frags, ev_counter(), "Missing request for");
00915 ev_counter_inc(1, true);
00916 }
00917 if (req->first > ev_counter())
00918 {
00919 ++req;
00920 continue;
00921 }
00922 TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Checking that data exists for request window " << std::to_string(req->first) << " (Buffered mode will always succeed)" << TLOG_ENDL;
00923 Fragment::timestamp_t min = ts > windowOffset_ ? ts - windowOffset_ : 0;
00924 Fragment::timestamp_t max = min + windowWidth_;
00925 TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: min is " << std::to_string(min) << ", max is " << std::to_string(max)
00926 << " and last point in buffer is " << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0)) << " (sz=" << std::to_string(dataBuffer_.size()) << ")" << TLOG_ENDL;
00927 bool windowClosed = mode_ != RequestMode::Window || (dataBuffer_.size() > 0 && dataBuffer_.back()->timestamp() >= max);
00928 bool windowTimeout = static_cast<size_t>(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - last_window_send_time_).count()) > window_close_timeout_us_;
00929 if (windowTimeout)
00930 {
00931 TLOG_WARNING("CommandableFragmentGenerator") << "A timeout occurred waiting for data to close the request window (max=" << std::to_string(max) << ", buffer=" << std::to_string(dataBuffer_.back()->timestamp()) << "). Time waiting: "
00932 << std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - last_window_send_time_).count() << " us "
00933 << "(> " << std::to_string(window_close_timeout_us_) << " us)." << TLOG_ENDL;
00934 }
00935 if (windowClosed || !data_thread_running_ || windowTimeout)
00936 {
00937 TLOG_DEBUG("CommandableFragmentGenerator") << "Creating ContainerFragment for Buffered or Window-requested Fragments" << TLOG_ENDL;
00938 frags.emplace_back(new artdaq::Fragment(ev_counter(), fragment_id()));
00939 frags.back()->setTimestamp(ts);
00940 ContainerFragmentLoader cfl(*frags.back());
00941
00942 if (mode_ == RequestMode::Window && !windowClosed) cfl.set_missing_data(true);
00943 if (mode_ == RequestMode::Window && dataBuffer_.size() > 0 && dataBuffer_.front()->timestamp() > min)
00944 {
00945 TLOG_DEBUG("CommandableFragmentGenerator") << "Request Window covers data that is either before data collection began or has fallen off the end of the buffer" << TLOG_ENDL;
00946 cfl.set_missing_data(true);
00947 }
00948
00949
00950
00951 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00952 {
00953 if (mode_ == RequestMode::Window)
00954 {
00955 Fragment::timestamp_t fragT = (*it)->timestamp();
00956 if (fragT < min || fragT > max || (fragT == max && windowWidth_ > 0))
00957 {
00958 ++it;
00959 continue;
00960 }
00961 }
00962
00963 TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" << TLOG_ENDL;
00964 cfl.addFragment(*it);
00965
00966 if (mode_ == RequestMode::Buffer || (mode_ == RequestMode::Window && uniqueWindows_))
00967 {
00968 it = dataBuffer_.erase(it);
00969 }
00970 else
00971 {
00972 ++it;
00973 }
00974 }
00975 req = requests_.erase(req);
00976 ev_counter_inc(1, true);
00977 last_window_send_time_ = std::chrono::steady_clock::now();
00978 }
00979 else
00980 {
00981
00982 break;
00983 }
00984 }
00985 }
00986 getDataBufferStats();
00987 }
00988
00989 if (frags.size() > 0)
00990 TLOG_ARB(9, "CommandableFragmentGenerator") << "Finished Processing Event " << std::to_string(ev_counter() + 1) << " for fragment_id " << fragment_id() << "." << TLOG_ENDL;
00991 return true;
00992 }
00993
00994 bool artdaq::CommandableFragmentGenerator::sendEmptyFragment(artdaq::FragmentPtrs& frags, size_t seqId, std::string desc)
00995 {
00996 TLOG_WARNING("CommandableFragmentGenerator") << desc << " request " << seqId << ", sending empty fragment" << TLOG_ENDL;
00997 for (auto fid : fragment_ids_)
00998 {
00999 auto frag = new Fragment();
01000 frag->setSequenceID(seqId);
01001 frag->setFragmentID(fid);
01002 frag->setSystemType(Fragment::EmptyFragmentType);
01003 frags.emplace_back(FragmentPtr(frag));
01004 }
01005 return true;
01006 }
01007
01008 void artdaq::CommandableFragmentGenerator::sendEmptyFragments(artdaq::FragmentPtrs& frags)
01009 {
01010 auto sequence_id = Fragment::InvalidSequenceID;
01011 auto timestamp = Fragment::InvalidTimestamp;
01012
01013 TLOG_ARB(19, "CommandableFragmentGenerator") << "Sending Empty Fragments" << TLOG_ENDL;
01014 for (auto it = requests_.begin(); it != requests_.end();)
01015 {
01016 auto seq = it->first;
01017 auto ts = it->second;
01018
01019 while (seq > ev_counter())
01020 {
01021
01022 sendEmptyFragment(frags, ev_counter(), "Missed request for");
01023 ev_counter_inc(1, true);
01024 }
01025
01026
01027 if (++it == requests_.end())
01028 {
01029 sequence_id = seq;
01030 timestamp = ts;
01031 break;
01032 }
01033 if (seq < ev_counter()) continue;
01034
01035 }
01036 requests_.clear();
01037
01038 if (sequence_id < ev_counter()) return;
01039 requests_[sequence_id] = timestamp;
01040 }