00001 #define TRACE_NAME "CommandableFragmentGenerator"
00002 #include "tracemf.h"
00003
00004 #include "artdaq/Application/CommandableFragmentGenerator.hh"
00005
00006 #include <boost/exception/all.hpp>
00007 #include <boost/throw_exception.hpp>
00008
00009 #include <limits>
00010 #include <iterator>
00011
00012 #include "canvas/Utilities/Exception.h"
00013 #include "cetlib_except/exception.h"
00014 #include "fhiclcpp/ParameterSet.h"
00015
00016 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
00017 #include "artdaq-core/Data/Fragment.hh"
00018 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
00019 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00020 #include "artdaq-core/Utilities/TimeUtils.hh"
00021
00022 #include <fstream>
00023 #include <iomanip>
00024 #include <iterator>
00025 #include <iostream>
00026 #include <iomanip>
00027 #include <algorithm>
00028 #include <sys/poll.h>
00029 #include "artdaq/DAQdata/TCPConnect.hh"
00030
00031 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator()
00032 : mutex_()
00033 , requestReceiver_(new RequestReceiver())
00034 , windowOffset_(0)
00035 , windowWidth_(0)
00036 , staleTimeout_(Fragment::InvalidTimestamp)
00037 , expectedType_(Fragment::EmptyFragmentType)
00038 , maxFragmentCount_(std::numeric_limits<size_t>::max())
00039 , uniqueWindows_(true)
00040 , missing_request_(true)
00041 , missing_request_time_()
00042 , last_window_send_time_()
00043 , last_window_send_time_set_(false)
00044 , windows_sent_ooo_()
00045 , missing_request_window_timeout_us_(1000000)
00046 , window_close_timeout_us_(2000000)
00047 , useDataThread_(false)
00048 , sleep_on_no_data_us_(0)
00049 , data_thread_running_(false)
00050 , dataBufferDepthFragments_(0)
00051 , dataBufferDepthBytes_(0)
00052 , maxDataBufferDepthFragments_(1000)
00053 , maxDataBufferDepthBytes_(1000)
00054 , useMonitoringThread_(false)
00055 , monitoringInterval_(0)
00056 , lastMonitoringCall_()
00057 , isHardwareOK_(true)
00058 , dataBuffer_()
00059 , newDataBuffer_()
00060 , run_number_(-1)
00061 , subrun_number_(-1)
00062 , timeout_(std::numeric_limits<uint64_t>::max())
00063 , timestamp_(std::numeric_limits<uint64_t>::max())
00064 , should_stop_(false)
00065 , exception_(false)
00066 , force_stop_(false)
00067 , latest_exception_report_("none")
00068 , ev_counter_(1)
00069 , board_id_(-1)
00070 , instance_name_for_metrics_("FragmentGenerator")
00071 , sleep_on_stop_us_(0)
00072 {}
00073
00074 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(const fhicl::ParameterSet& ps)
00075 : mutex_()
00076 , windowOffset_(ps.get<Fragment::timestamp_t>("request_window_offset", 0))
00077 , windowWidth_(ps.get<Fragment::timestamp_t>("request_window_width", 0))
00078 , staleTimeout_(ps.get<Fragment::timestamp_t>("stale_request_timeout", 0xFFFFFFFF))
00079 , expectedType_(ps.get<Fragment::type_t>("expected_fragment_type", Fragment::type_t(Fragment::EmptyFragmentType)))
00080 , uniqueWindows_(ps.get<bool>("request_windows_are_unique", true))
00081 , missing_request_(false)
00082 , missing_request_time_(decltype(missing_request_time_)::max())
00083 , last_window_send_time_(decltype(last_window_send_time_)::max())
00084 , last_window_send_time_set_(false)
00085 , windows_sent_ooo_()
00086 , missing_request_window_timeout_us_(ps.get<size_t>("missing_request_window_timeout_us", 1000000))
00087 , window_close_timeout_us_(ps.get<size_t>("window_close_timeout_us", 2000000))
00088 , useDataThread_(ps.get<bool>("separate_data_thread", false))
00089 , sleep_on_no_data_us_(ps.get<size_t>("sleep_on_no_data_us", 0))
00090 , data_thread_running_(false)
00091 , dataBufferDepthFragments_(0)
00092 , dataBufferDepthBytes_(0)
00093 , maxDataBufferDepthFragments_(ps.get<int>("data_buffer_depth_fragments", 1000))
00094 , maxDataBufferDepthBytes_(ps.get<size_t>("data_buffer_depth_mb", 1000) * 1024 * 1024)
00095 , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
00096 , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
00097 , lastMonitoringCall_()
00098 , isHardwareOK_(true)
00099 , dataBuffer_()
00100 , newDataBuffer_()
00101 , run_number_(-1)
00102 , subrun_number_(-1)
00103 , timeout_(std::numeric_limits<uint64_t>::max())
00104 , timestamp_(std::numeric_limits<uint64_t>::max())
00105 , should_stop_(false)
00106 , exception_(false)
00107 , force_stop_(false)
00108 , latest_exception_report_("none")
00109 , ev_counter_(1)
00110 , board_id_(-1)
00111 , sleep_on_stop_us_(0)
00112 {
00113 board_id_ = ps.get<int>("board_id");
00114 instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
00115
00116 fragment_ids_ = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
00117
00118 TLOG_TRACE("CommandableFragmentGenerator") << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)" << TLOG_ENDL;
00119 int fragment_id = ps.get<int>("fragment_id", -99);
00120
00121 if (fragment_id != -99)
00122 {
00123 if (fragment_ids_.size() != 0)
00124 {
00125 latest_exception_report_ = "Error in CommandableFragmentGenerator: can't both define \"fragment_id\" and \"fragment_ids\" in FHiCL document";
00126 throw cet::exception(latest_exception_report_);
00127 }
00128 else
00129 {
00130 fragment_ids_.emplace_back(fragment_id);
00131 }
00132 }
00133
00134 sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
00135
00136 dataBuffer_.emplace_back(FragmentPtr(new Fragment()));
00137 (*dataBuffer_.begin())->setSystemType(Fragment::EmptyFragmentType);
00138
00139 std::string modeString = ps.get<std::string>("request_mode", "ignored");
00140 if (modeString == "single" || modeString == "Single")
00141 {
00142 mode_ = RequestMode::Single;
00143 }
00144 else if (modeString.find("buffer") != std::string::npos || modeString.find("Buffer") != std::string::npos)
00145 {
00146 mode_ = RequestMode::Buffer;
00147 }
00148 else if (modeString == "window" || modeString == "Window")
00149 {
00150 mode_ = RequestMode::Window;
00151 }
00152 else if (modeString.find("ignore") != std::string::npos || modeString.find("Ignore") != std::string::npos)
00153 {
00154 mode_ = RequestMode::Ignored;
00155 }
00156 TLOG_DEBUG("CommandableFragmentGenerator") << "Request mode is " << printMode_() << TLOG_ENDL;
00157
00158 if (mode_ != RequestMode::Ignored)
00159 {
00160 if (!useDataThread_)
00161 {
00162 latest_exception_report_ = "Error in CommandableFragmentGenerator: use_data_thread must be true when request_mode is not \"Ignored\"!";
00163 throw cet::exception(latest_exception_report_);
00164 }
00165 requestReceiver_.reset(new RequestReceiver(ps));
00166 }
00167 }
00168
00169 artdaq::CommandableFragmentGenerator::~CommandableFragmentGenerator()
00170 {
00171 joinThreads();
00172 }
00173
00174 void artdaq::CommandableFragmentGenerator::joinThreads()
00175 {
00176 should_stop_ = true;
00177 force_stop_ = true;
00178 TLOG_DEBUG("CommandableFragmentGenerator") << "Joining dataThread" << TLOG_ENDL;
00179 if (dataThread_.joinable()) dataThread_.join();
00180 TLOG_DEBUG("CommandableFragmentGenerator") << "Joining monitoringThread" << TLOG_ENDL;
00181 if (monitoringThread_.joinable()) monitoringThread_.join();
00182 requestReceiver_.reset(nullptr);
00183 }
00184
00185 bool artdaq::CommandableFragmentGenerator::getNext(FragmentPtrs& output)
00186 {
00187 bool result = true;
00188
00189 if (check_stop()) usleep(sleep_on_stop_us_);
00190 if (exception() || force_stop_) return false;
00191
00192 if (!useMonitoringThread_ && monitoringInterval_ > 0)
00193 {
00194 TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: Checking whether to collect Monitoring Data" << TLOG_ENDL;
00195 auto now = std::chrono::steady_clock::now();
00196
00197 if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
00198 {
00199 TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: Collecting Monitoring Data" << TLOG_ENDL;
00200 isHardwareOK_ = checkHWStatus_();
00201 TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ << TLOG_ENDL;
00202 lastMonitoringCall_ = now;
00203 }
00204 }
00205
00206 try
00207 {
00208 std::lock_guard<std::mutex> lk(mutex_);
00209 if (useDataThread_)
00210 {
00211 TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Calling applyRequests" << TLOG_ENDL;
00212 result = applyRequests(output);
00213 TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Done with applyRequests" << TLOG_ENDL;
00214
00215 if (exception())
00216 {
00217 throw cet::exception("CommandableFragmentGenerator") << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
00218 }
00219 }
00220 else
00221 {
00222 if (!isHardwareOK_)
00223 {
00224 TLOG_ERROR("CommandableFragmentGenerator") << "Stopping CFG because the hardware reports bad status!" << TLOG_ENDL;
00225 return false;
00226 }
00227 TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Calling getNext_ " << std::to_string(ev_counter()) << TLOG_ENDL;
00228 try
00229 {
00230 result = getNext_(output);
00231 }
00232 catch (...)
00233 {
00234 throw;
00235 }
00236 TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Done with getNext_ " << std::to_string(ev_counter()) << TLOG_ENDL;
00237 }
00238 }
00239 catch (const cet::exception& e)
00240 {
00241 latest_exception_report_ = "cet::exception caught in getNext(): ";
00242 latest_exception_report_.append(e.what());
00243 TLOG_ERROR("CommandableFragmentGenerator") << "getNext: cet::exception caught: " << e << TLOG_ENDL;
00244 set_exception(true);
00245 return false;
00246 }
00247 catch (const boost::exception& e)
00248 {
00249 latest_exception_report_ = "boost::exception caught in getNext(): ";
00250 latest_exception_report_.append(boost::diagnostic_information(e));
00251 TLOG_ERROR("CommandableFragmentGenerator") << "getNext: boost::exception caught: " << boost::diagnostic_information(e) << TLOG_ENDL;
00252 set_exception(true);
00253 return false;
00254 }
00255 catch (const std::exception& e)
00256 {
00257 latest_exception_report_ = "std::exception caught in getNext(): ";
00258 latest_exception_report_.append(e.what());
00259 TLOG_ERROR("CommandableFragmentGenerator") << "getNext: std::exception caught: " << e.what() << TLOG_ENDL;
00260 set_exception(true);
00261 return false;
00262 }
00263 catch (...)
00264 {
00265 latest_exception_report_ = "Unknown exception caught in getNext().";
00266 TLOG_ERROR("CommandableFragmentGenerator") << "getNext: unknown exception caught" << TLOG_ENDL;
00267 set_exception(true);
00268 return false;
00269 }
00270
00271 if (!result)
00272 {
00273 TLOG_DEBUG("getNext") << "stopped " << TLOG_ENDL;
00274 }
00275
00276 return result;
00277 }
00278
00279 bool artdaq::CommandableFragmentGenerator::check_stop()
00280 {
00281 TLOG_ARB(14, "CommandableFragmentGeneraotr") << "CFG::check_stop: should_stop=" << should_stop() << ", useDataThread_=" << useDataThread_ << ", exception status =" << int(exception()) << TLOG_ENDL;
00282
00283 if (!should_stop()) return false;
00284 if (!useDataThread_ || mode_ == RequestMode::Ignored) return true;
00285 if (force_stop_) return true;
00286
00287
00288 return !requestReceiver_->isRunning();
00289 }
00290
00291 int artdaq::CommandableFragmentGenerator::fragment_id() const
00292 {
00293 if (fragment_ids_.size() != 1)
00294 {
00295 throw cet::exception("Error in CommandableFragmentGenerator: can't call fragment_id() unless member fragment_ids_ vector is length 1");
00296 }
00297 else
00298 {
00299 return fragment_ids_[0];
00300 }
00301 }
00302
00303 size_t artdaq::CommandableFragmentGenerator::ev_counter_inc(size_t step, bool force)
00304 {
00305 if (force || mode_ == RequestMode::Ignored)
00306 {
00307 return ev_counter_.fetch_add(step);
00308 }
00309 return ev_counter_.load();
00310 }
00311
00312 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
00313 {
00314 if (run < 0) throw cet::exception("CommandableFragmentGenerator") << "negative run number";
00315
00316 timeout_ = timeout;
00317 timestamp_ = timestamp;
00318 ev_counter_.store(1);
00319 should_stop_.store(false);
00320 exception_.store(false);
00321 run_number_ = run;
00322 subrun_number_ = 1;
00323 latest_exception_report_ = "none";
00324 dataBuffer_.clear();
00325 last_window_send_time_set_ = false;
00326
00327 start();
00328
00329 std::unique_lock<std::mutex> lk(mutex_);
00330 if (useDataThread_) startDataThread();
00331 if (useMonitoringThread_) startMonitoringThread();
00332 if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
00333 }
00334
00335 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
00336 {
00337 TLOG_DEBUG("CommandableFragmentGenerator") << "Stop Command received." << TLOG_ENDL;
00338
00339 timeout_ = timeout;
00340 timestamp_ = timestamp;
00341
00342 stopNoMutex();
00343 should_stop_.store(true);
00344 std::unique_lock<std::mutex> lk(mutex_);
00345 stop();
00346 }
00347
00348 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
00349 {
00350 timeout_ = timeout;
00351 timestamp_ = timestamp;
00352
00353 pauseNoMutex();
00354 should_stop_.store(true);
00355 std::unique_lock<std::mutex> lk(mutex_);
00356
00357 pause();
00358 }
00359
00360 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
00361 {
00362 timeout_ = timeout;
00363 timestamp_ = timestamp;
00364
00365 subrun_number_ += 1;
00366 should_stop_ = false;
00367
00368 dataBuffer_.clear();
00369
00370
00371 resume();
00372
00373 std::unique_lock<std::mutex> lk(mutex_);
00374 if (useDataThread_) startDataThread();
00375 if (useMonitoringThread_) startMonitoringThread();
00376 if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
00377 }
00378
00379 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
00380 {
00381 std::lock_guard<std::mutex> lk(mutex_);
00382
00383
00384
00385
00386
00387
00388 std::string childReport = reportSpecific(which);
00389 if (childReport.length() > 0) { return childReport; }
00390
00391
00392 if (which == "latest_exception")
00393 {
00394 return latest_exception_report_;
00395 }
00396
00397
00398 childReport = report();
00399 if (childReport.length() > 0) { return childReport; }
00400
00401
00402 std::string tmpString = "The \"" + which + "\" command is not ";
00403 tmpString.append("currently supported by the ");
00404 tmpString.append(metricsReportingInstanceName());
00405 tmpString.append(" fragment generator.");
00406 return tmpString;
00407 }
00408
00409
00410 void artdaq::CommandableFragmentGenerator::pauseNoMutex()
00411 {
00412 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
00413 }
00414
00415 void artdaq::CommandableFragmentGenerator::pause()
00416 {
00417 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
00418 }
00419
00420 void artdaq::CommandableFragmentGenerator::resume()
00421 {
00422 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
00423 }
00424
00425 std::string artdaq::CommandableFragmentGenerator::report()
00426 {
00427 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
00428 return "";
00429 }
00430
00431 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const&)
00432 {
00433 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
00434 return "";
00435 }
00436
00437 bool artdaq::CommandableFragmentGenerator::checkHWStatus_()
00438 {
00439 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
00440 return true;
00441 }
00442
00443 bool artdaq::CommandableFragmentGenerator::metaCommand(std::string const&, std::string const&)
00444 {
00445 #pragma message "Using default implementation of CommandableFragmentGenerator::metaCommand(std::string, std::string)"
00446 return true;
00447 }
00448
00449 void artdaq::CommandableFragmentGenerator::startDataThread()
00450 {
00451 if (dataThread_.joinable()) dataThread_.join();
00452 TLOG_INFO("CommandableFragmentGenerator") << "Starting Data Receiver Thread" << TLOG_ENDL;
00453 dataThread_ = boost::thread(&CommandableFragmentGenerator::getDataLoop, this);
00454 }
00455
00456 void artdaq::CommandableFragmentGenerator::startMonitoringThread()
00457 {
00458 if (monitoringThread_.joinable()) monitoringThread_.join();
00459 TLOG_INFO("CommandableFragmentGenerator") << "Starting Hardware Monitoring Thread" << TLOG_ENDL;
00460 monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
00461 }
00462
00463 std::string artdaq::CommandableFragmentGenerator::printMode_()
00464 {
00465 switch (mode_)
00466 {
00467 case RequestMode::Single:
00468 return "Single";
00469 case RequestMode::Buffer:
00470 return "Buffer";
00471 case RequestMode::Window:
00472 return "Window";
00473 case RequestMode::Ignored:
00474 return "Ignored";
00475 }
00476
00477 return "ERROR";
00478 }
00479
00480 void artdaq::CommandableFragmentGenerator::getDataLoop()
00481 {
00482 data_thread_running_ = true;
00483 while (!force_stop_)
00484 {
00485 if (!isHardwareOK_)
00486 {
00487 TLOG_DEBUG("CommandableFragmentGenerator") << "getDataLoop: isHardwareOK is " << isHardwareOK_ << ", aborting data thread" << TLOG_ENDL;
00488 data_thread_running_ = false;
00489 return;
00490 }
00491
00492 TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: calling getNext_" << TLOG_ENDL;
00493
00494 bool data = false;
00495 auto startdata = std::chrono::steady_clock::now();
00496
00497 try
00498 {
00499 data = getNext_(newDataBuffer_);
00500 }
00501 catch (...)
00502 {
00503 ExceptionHandler(ExceptionHandlerRethrow::no,
00504 "Exception thrown by fragment generator in CommandableFragmentGenerator::getDataLoop; setting exception state to \"true\"");
00505 set_exception(true);
00506
00507 data_thread_running_ = false;
00508 return;
00509 }
00510
00511 if (metricMan)
00512 {
00513 metricMan->sendMetric("Avg Data Acquisition Time", TimeUtils::GetElapsedTime(startdata), "s", 3, artdaq::MetricMode::Average);
00514 }
00515
00516 if (newDataBuffer_.size() == 0 && sleep_on_no_data_us_ > 0)
00517 {
00518 usleep(sleep_on_no_data_us_);
00519 }
00520
00521 TLOG_ARB(15, "CommandableFragmentGenerator") << "Waiting for data buffer ready" << TLOG_ENDL;
00522 if (!waitForDataBufferReady()) return;
00523 TLOG_ARB(15, "CommandableFragmentGenerator") << "Done waiting for data buffer ready" << TLOG_ENDL;
00524
00525 TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: processing data" << TLOG_ENDL;
00526 if (data && !force_stop_)
00527 {
00528 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00529 switch (mode_)
00530 {
00531 case RequestMode::Single:
00532
00533 while (newDataBuffer_.size() >= fragment_ids_.size())
00534 {
00535 dataBuffer_.clear();
00536 auto it = newDataBuffer_.begin();
00537 std::advance(it, fragment_ids_.size());
00538 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_, newDataBuffer_.begin(), it);
00539 }
00540 break;
00541 case RequestMode::Buffer:
00542 case RequestMode::Ignored:
00543 case RequestMode::Window:
00544 default:
00545
00546 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_);
00547 break;
00548 }
00549 getDataBufferStats();
00550 }
00551
00552 {
00553 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00554 if (dataBuffer_.size() > 0)
00555 {
00556 dataCondition_.notify_all();
00557 }
00558 }
00559 if (!data || force_stop_)
00560 {
00561 TLOG_INFO("CommandableFragmentGenerator") << "Data flow has stopped. Ending data collection thread" << TLOG_ENDL;
00562 data_thread_running_ = false;
00563 return;
00564 }
00565 }
00566 }
00567
00568 bool artdaq::CommandableFragmentGenerator::waitForDataBufferReady()
00569 {
00570 auto startwait = std::chrono::steady_clock::now();
00571 auto first = true;
00572 auto lastwaittime = 0ULL;
00573 while (dataBufferIsTooLarge())
00574 {
00575 if (should_stop())
00576 {
00577 TLOG_DEBUG("CommandableFragmentGenerator") << "Run ended while waiting for buffer to shrink!" << TLOG_ENDL;
00578 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00579 getDataBufferStats();
00580 dataCondition_.notify_all();
00581 data_thread_running_ = false;
00582 return false;
00583 }
00584 auto waittime = TimeUtils::GetElapsedTimeMilliseconds(startwait);
00585
00586 if (first || (waittime != lastwaittime && waittime % 1000 == 0))
00587 {
00588 TLOG_WARNING("CommandableFragmentGenerator") << "Bad Omen: Data Buffer has exceeded its size limits. "
00589 << "(seq_id=" << ev_counter()
00590 << ", frags=" << dataBufferDepthFragments_ << "/" << maxDataBufferDepthFragments_
00591 << ", szB=" << dataBufferDepthBytes_ << "/" << maxDataBufferDepthBytes_ << ")" << TLOG_ENDL;
00592 TLOG_TRACE("CommandableFragmentGenerator") << "Bad Omen: Possible causes include requests not getting through or Ignored-mode BR issues" << TLOG_ENDL;
00593 first = false;
00594 }
00595 if (waittime % 5 && waittime != lastwaittime)
00596 {
00597 TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: Data Retreival paused for " << std::to_string(waittime) << " ms waiting for data buffer to drain" << TLOG_ENDL;
00598 }
00599 lastwaittime = waittime;
00600 usleep(1000);
00601 }
00602 return true;
00603 }
00604
00605 bool artdaq::CommandableFragmentGenerator::dataBufferIsTooLarge()
00606 {
00607 return (maxDataBufferDepthFragments_ > 0 && dataBufferDepthFragments_ > maxDataBufferDepthFragments_) || (maxDataBufferDepthBytes_ > 0 && dataBufferDepthBytes_ > maxDataBufferDepthBytes_);
00608 }
00609
00610 void artdaq::CommandableFragmentGenerator::getDataBufferStats()
00611 {
00613 dataBufferDepthFragments_ = dataBuffer_.size();
00614 size_t acc = 0;
00615 TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: Calculating buffer size" << TLOG_ENDL;
00616 for (auto i = dataBuffer_.begin(); i != dataBuffer_.end(); ++i)
00617 {
00618 if (i->get() != nullptr)
00619 {
00620 acc += (*i)->sizeBytes();
00621 }
00622 }
00623 dataBufferDepthBytes_ = acc;
00624
00625 if (metricMan)
00626 {
00627 TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: Sending Metrics" << TLOG_ENDL;
00628 metricMan->sendMetric("Buffer Depth Fragments", dataBufferDepthFragments_.load(), "fragments", 1, MetricMode::LastPoint);
00629 metricMan->sendMetric("Buffer Depth Bytes", dataBufferDepthBytes_.load(), "bytes", 1, MetricMode::LastPoint);
00630 }
00631 TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: frags=" << dataBufferDepthFragments_.load() << "/" << maxDataBufferDepthFragments_
00632 << ", sz=" << std::to_string(dataBufferDepthBytes_.load()) << "/" << std::to_string(maxDataBufferDepthBytes_) << TLOG_ENDL;
00633 }
00634
00635 void artdaq::CommandableFragmentGenerator::checkDataBuffer()
00636 {
00637 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00638 dataCondition_.wait_for(lock, std::chrono::milliseconds(10));
00639 if (dataBufferDepthFragments_ > 0)
00640 {
00641 if ((mode_ == RequestMode::Buffer || mode_ == RequestMode::Window))
00642 {
00643
00644 while (dataBufferIsTooLarge())
00645 {
00646 dataBuffer_.erase(dataBuffer_.begin());
00647 getDataBufferStats();
00648 }
00649 if (dataBuffer_.size() > 0)
00650 {
00651 TLOG_ARB(17, "CommandableFragmentGenerator") << "Determining if Fragments can be dropped from data buffer" << TLOG_ENDL;
00652 Fragment::timestamp_t last = dataBuffer_.back()->timestamp();
00653 Fragment::timestamp_t min = last > staleTimeout_ ? last - staleTimeout_ : 0;
00654 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00655 {
00656 if ((*it)->timestamp() < min)
00657 {
00658 it = dataBuffer_.erase(it);
00659 }
00660 else
00661 {
00662 ++it;
00663 }
00664 }
00665 getDataBufferStats();
00666 }
00667 }
00668 else if (mode_ == RequestMode::Single && dataBuffer_.size() > fragment_ids_.size())
00669 {
00670
00671 while (dataBuffer_.size() > fragment_ids_.size())
00672 {
00673 dataBuffer_.erase(dataBuffer_.begin());
00674 }
00675 }
00676 }
00677 }
00678
00679 void artdaq::CommandableFragmentGenerator::getMonitoringDataLoop()
00680 {
00681 while (!force_stop_)
00682 {
00683 if (should_stop() || monitoringInterval_ <= 0)
00684 {
00685 TLOG_DEBUG("CommandableFragmentGenerator") << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
00686 << " and monitoringInterval is " << monitoringInterval_ << ", returning" << TLOG_ENDL;
00687 return;
00688 }
00689 TLOG_ARB(12, "CommandableFragmentGenerator") << "getMonitoringDataLoop: Determining whether to call checkHWStatus_" << TLOG_ENDL;
00690
00691 auto now = std::chrono::steady_clock::now();
00692 if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
00693 {
00694 isHardwareOK_ = checkHWStatus_();
00695 TLOG_ARB(12, "CommandableFragmentGenerator") << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ << TLOG_ENDL;
00696 lastMonitoringCall_ = now;
00697 }
00698 usleep(monitoringInterval_ / 10);
00699 }
00700 }
00701
00702 void artdaq::CommandableFragmentGenerator::applyRequestsIgnoredMode(artdaq::FragmentPtrs& frags)
00703 {
00704
00705 TLOG_ARB(9, "CommandableFragmentGenerator") << "Mode is Ignored; Copying data to output" << TLOG_ENDL;
00706 std::move(dataBuffer_.begin(), dataBuffer_.end(), std::inserter(frags, frags.end()));
00707 dataBuffer_.clear();
00708 }
00709
00710 void artdaq::CommandableFragmentGenerator::applyRequestsSingleMode(artdaq::FragmentPtrs& frags)
00711 {
00712
00713 auto requests = requestReceiver_->GetRequests();
00714 while (requests.size() > 1) {
00715
00716 requestReceiver_->RemoveRequest(requests.begin()->first);
00717 requests.erase(requests.begin());
00718 }
00719 sendEmptyFragments(frags, requests);
00720
00721
00722 if (requests.size() == 0 || !requests.count(ev_counter())) return;
00723
00724 if (dataBuffer_.size() > 0)
00725 {
00726 TLOG_ARB(9, "CommandableFragmentGenerator") << "Mode is Single; Sending copy of last event" << TLOG_ENDL;
00727 for (auto& fragptr : dataBuffer_)
00728 {
00729
00730 auto frag = fragptr.get();
00731 auto newfrag = std::unique_ptr<artdaq::Fragment>(new Fragment(ev_counter(), frag->fragmentID()));
00732 newfrag->resize(frag->size() - detail::RawFragmentHeader::num_words());
00733 memcpy(newfrag->headerAddress(), frag->headerAddress(), frag->sizeBytes());
00734 newfrag->setTimestamp(requests[ev_counter()]);
00735 newfrag->setSequenceID(ev_counter());
00736 frags.push_back(std::move(newfrag));
00737 }
00738 }
00739 else
00740 {
00741 sendEmptyFragment(frags, ev_counter(), "No data for");
00742 }
00743 requestReceiver_->RemoveRequest(ev_counter());
00744 ev_counter_inc(1, true);
00745 }
00746
00747 void artdaq::CommandableFragmentGenerator::applyRequestsBufferMode(artdaq::FragmentPtrs& frags)
00748 {
00749
00750 auto requests = requestReceiver_->GetRequests();
00751 while (requests.size() > 1) {
00752
00753 requestReceiver_->RemoveRequest(requests.begin()->first);
00754 requests.erase(requests.begin());
00755 }
00756 sendEmptyFragments(frags, requests);
00757
00758
00759 if (requests.size() == 0 || !requests.count(ev_counter())) return;
00760
00761 TLOG_DEBUG("CommandableFragmentGenerator") << "Creating ContainerFragment for Buffered Fragments" << TLOG_ENDL;
00762 frags.emplace_back(new artdaq::Fragment(ev_counter(), fragment_id()));
00763 frags.back()->setTimestamp(requests[ev_counter()]);
00764 ContainerFragmentLoader cfl(*frags.back());
00765 cfl.set_missing_data(false);
00766
00767
00768
00769 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00770 {
00771 TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" << TLOG_ENDL;
00772 cfl.addFragment(*it);
00773 it = dataBuffer_.erase(it);
00774 }
00775 requestReceiver_->RemoveRequest(ev_counter());
00776 ev_counter_inc(1, true);
00777 }
00778
00779 void artdaq::CommandableFragmentGenerator::applyRequestsWindowMode(artdaq::FragmentPtrs& frags)
00780 {
00781 TLOG(10) << "applyRequestsWindowMode BEGIN";
00782 if (!last_window_send_time_set_)
00783 {
00784 last_window_send_time_ = std::chrono::steady_clock::now();
00785 last_window_send_time_set_ = true;
00786 }
00787
00788 auto requests = requestReceiver_->GetRequests();
00789 bool now_have_desired_request = std::any_of(requests.begin(), requests.end(),
00790 [this](decltype(requests)::value_type& request) {
00791 return request.first == ev_counter(); });
00792
00793 if (missing_request_)
00794 {
00795 if (!now_have_desired_request && TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) > missing_request_window_timeout_us_)
00796 {
00797 TLOG_ERROR("CommandableFragmentGenerator") << "Data-taking has paused for " << TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) << " us "
00798 << "(> " << std::to_string(missing_request_window_timeout_us_) << " us) while waiting for missing data request messages."
00799 << " Sending Empty Fragments for missing requests!" << TLOG_ENDL;
00800 sendEmptyFragments(frags, requests);
00801
00802 missing_request_ = false;
00803 missing_request_time_ = decltype(missing_request_time_)::max();
00804 }
00805 else if (now_have_desired_request) {
00806 missing_request_ = false;
00807 missing_request_time_ = decltype(missing_request_time_)::max();
00808 }
00809 }
00810
00811 TLOG(10) << "applyRequestsWindowMode: Starting request processing";
00812 for (auto req = requests.begin(); req != requests.end();)
00813 {
00814 while (req->first < ev_counter() && requests.size() > 0)
00815 {
00816 TLOG(10) << "applyRequestsWindowMode: Clearing passed request for sequence ID " << req->first;
00817 requestReceiver_->RemoveRequest(req->first);
00818 req = requests.erase(req);
00819 }
00820 if (requests.size() == 0) break;
00821 if (req->first > ev_counter())
00822 {
00823 if (!missing_request_)
00824 {
00825 missing_request_ = true;
00826 missing_request_time_ = std::chrono::steady_clock::now();
00827 }
00828 }
00829 auto ts = req->second;
00830 TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Checking that data exists for request window " << std::to_string(req->first) << TLOG_ENDL;
00831 Fragment::timestamp_t min = ts > windowOffset_ ? ts - windowOffset_ : 0;
00832 Fragment::timestamp_t max = min + windowWidth_;
00833 TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: min is " << std::to_string(min) << ", max is " << std::to_string(max)
00834 << " and last point in buffer is " << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0)) << " (sz=" << std::to_string(dataBuffer_.size()) << ")" << TLOG_ENDL;
00835 bool windowClosed = dataBuffer_.size() > 0 && dataBuffer_.back()->timestamp() >= max;
00836 bool windowTimeout = TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) > window_close_timeout_us_;
00837 if (windowTimeout)
00838 {
00839 TLOG_WARNING("CommandableFragmentGenerator") << "A timeout occurred waiting for data to close the request window (max=" << std::to_string(max)
00840 << ", buffer=" << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0))
00841 << " (if no buffer in memory, this is shown as a 0)). Time waiting: "
00842 << TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) << " us "
00843 << "(> " << std::to_string(window_close_timeout_us_) << " us)." << TLOG_ENDL;
00844
00845 if (missing_request_) {
00846 TLOG_ERROR("CommandableFragmentGenerator") << "A Window timeout has occurred while there are pending requests. Sending empties." << TLOG_ENDL;
00847 sendEmptyFragments(frags, requests);
00848 }
00849 }
00850 if (windowClosed || !data_thread_running_ || windowTimeout)
00851 {
00852 TLOG_DEBUG("CommandableFragmentGenerator") << "Creating ContainerFragment for Buffered or Window-requested Fragments" << TLOG_ENDL;
00853 frags.emplace_back(new artdaq::Fragment(req->first, fragment_id()));
00854 frags.back()->setTimestamp(ts);
00855 ContainerFragmentLoader cfl(*frags.back());
00856
00857 if (!windowClosed) cfl.set_missing_data(true);
00858 if (dataBuffer_.size() > 0 && dataBuffer_.front()->timestamp() > min)
00859 {
00860 TLOG_DEBUG("CommandableFragmentGenerator") << "Request Window covers data that is either before data collection began or has fallen off the end of the buffer" << TLOG_ENDL;
00861 cfl.set_missing_data(true);
00862 }
00863
00864
00865
00866 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00867 {
00868 Fragment::timestamp_t fragT = (*it)->timestamp();
00869 if (fragT < min || fragT > max || (fragT == max && windowWidth_ > 0))
00870 {
00871 ++it;
00872 continue;
00873 }
00874
00875 TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" << TLOG_ENDL;
00876 cfl.addFragment(*it);
00877
00878 if (uniqueWindows_)
00879 {
00880 it = dataBuffer_.erase(it);
00881 }
00882 else
00883 {
00884 ++it;
00885 }
00886 }
00887 if (req->first == ev_counter())
00888 {
00889 ev_counter_inc(1, true);
00890 while (windows_sent_ooo_.count(ev_counter()))
00891 {
00892 TLOG_ARB(9, "CommandableFragmentGenerator") << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" << TLOG_ENDL;
00893 windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
00894 ev_counter_inc(1, true);
00895 }
00896 }
00897 else
00898 {
00899 windows_sent_ooo_.insert(req->first);
00900 }
00901 requestReceiver_->RemoveRequest(req->first);
00902 req = requests.erase(req);
00903 last_window_send_time_ = std::chrono::steady_clock::now();
00904 }
00905 else
00906 {
00907 ++req;
00908 }
00909 }
00910 }
00911
00912 bool artdaq::CommandableFragmentGenerator::applyRequests(artdaq::FragmentPtrs& frags)
00913 {
00914 if (check_stop() || exception())
00915 {
00916 return false;
00917 }
00918
00919
00920 if (mode_ == RequestMode::Ignored)
00921 {
00922 while (dataBufferDepthFragments_ <= 0)
00923 {
00924 if (check_stop() || exception() || !isHardwareOK_) return false;
00925 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00926 dataCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return dataBufferDepthFragments_ > 0; });
00927 }
00928 }
00929 else
00930 {
00931 if ((check_stop() && requestReceiver_->size() == 0) || exception()) return false;
00932 checkDataBuffer();
00933
00934
00935 auto counter = 0;
00936
00937 while (requestReceiver_->size() == 0 && counter < 100)
00938 {
00939 if (check_stop() || exception()) return false;
00940
00941 checkDataBuffer();
00942
00943 requestReceiver_->WaitForRequests(10);
00944 counter++;
00945 }
00946 }
00947
00948 {
00949 std::unique_lock<std::mutex> dlk(dataBufferMutex_);
00950
00951 switch (mode_)
00952 {
00953 case RequestMode::Single:
00954 applyRequestsSingleMode(frags);
00955 break;
00956 case RequestMode::Window:
00957 applyRequestsWindowMode(frags);
00958 break;
00959 case RequestMode::Buffer:
00960 applyRequestsBufferMode(frags);
00961 break;
00962 case RequestMode::Ignored:
00963 default:
00964 applyRequestsIgnoredMode(frags);
00965 break;
00966 }
00967
00968 getDataBufferStats();
00969 }
00970
00971 if (frags.size() > 0)
00972 TLOG_ARB(9, "CommandableFragmentGenerator") << "Finished Processing Event " << std::to_string(ev_counter() + 1) << " for fragment_id " << fragment_id() << "." << TLOG_ENDL;
00973 return true;
00974 }
00975
00976 bool artdaq::CommandableFragmentGenerator::sendEmptyFragment(artdaq::FragmentPtrs& frags, size_t seqId, std::string desc)
00977 {
00978 TLOG_WARNING("CommandableFragmentGenerator") << desc << " sequence ID " << seqId << ", sending empty fragment" << TLOG_ENDL;
00979 for (auto fid : fragment_ids_)
00980 {
00981 auto frag = new Fragment();
00982 frag->setSequenceID(seqId);
00983 frag->setFragmentID(fid);
00984 frag->setSystemType(Fragment::EmptyFragmentType);
00985 frags.emplace_back(FragmentPtr(frag));
00986 }
00987 return true;
00988 }
00989
00990 void artdaq::CommandableFragmentGenerator::sendEmptyFragments(artdaq::FragmentPtrs& frags, std::map<Fragment::sequence_id_t, Fragment::timestamp_t>& requests)
00991 {
00992 if (requests.size() == 0 && windows_sent_ooo_.size() == 0) return;
00993
00994 if (requests.size() > 0) {
00995 TLOG_ARB(19, "CommandableFragmentGenerator") << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << requests.begin()->first << TLOG_ENDL;
00996 while (requests.begin()->first > ev_counter())
00997 {
00998 sendEmptyFragment(frags, ev_counter(), "Missed request for");
00999 ev_counter_inc(1, true);
01000 }
01001 }
01002 else if (windows_sent_ooo_.size() > 0)
01003 {
01004 TLOG_ARB(19, "CommandableFragmentGenerator") << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << *windows_sent_ooo_.begin() << TLOG_ENDL;
01005 while (*windows_sent_ooo_.begin() > ev_counter())
01006 {
01007 sendEmptyFragment(frags, ev_counter(), "Missed request for");
01008 ev_counter_inc(1, true);
01009 }
01010 }
01011 while (windows_sent_ooo_.count(ev_counter()))
01012 {
01013 TLOG_ARB(19, "CommandableFragmentGenerator") << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" << TLOG_ENDL;
01014 windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
01015 ev_counter_inc(1, true);
01016 }
01017 }