00001 #define TRACE_NAME "CommandableFragmentGenerator"
00002 #include "tracemf.h"
00003
00004 #include "artdaq/Application/CommandableFragmentGenerator.hh"
00005
00006 #include <boost/exception/all.hpp>
00007 #include <boost/throw_exception.hpp>
00008
00009 #include <limits>
00010 #include <iterator>
00011
00012 #include "canvas/Utilities/Exception.h"
00013 #include "cetlib_except/exception.h"
00014 #include "fhiclcpp/ParameterSet.h"
00015
00016 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
00017 #include "artdaq-core/Data/Fragment.hh"
00018 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
00019 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00020 #include "artdaq-core/Utilities/TimeUtils.hh"
00021
00022 #include <fstream>
00023 #include <iomanip>
00024 #include <iterator>
00025 #include <iostream>
00026 #include <iomanip>
00027 #include <algorithm>
00028 #include <sys/poll.h>
00029 #include "artdaq/DAQdata/TCPConnect.hh"
00030
00031 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator()
00032 : mutex_()
00033 , requestReceiver_(new RequestReceiver())
00034 , windowOffset_(0)
00035 , windowWidth_(0)
00036 , staleTimeout_(Fragment::InvalidTimestamp)
00037 , expectedType_(Fragment::EmptyFragmentType)
00038 , maxFragmentCount_(std::numeric_limits<size_t>::max())
00039 , uniqueWindows_(true)
00040 , missing_request_(true)
00041 , missing_request_time_()
00042 , last_window_send_time_()
00043 , last_window_send_time_set_(false)
00044 , windows_sent_ooo_()
00045 , missing_request_window_timeout_us_(1000000)
00046 , window_close_timeout_us_(2000000)
00047 , useDataThread_(false)
00048 , sleep_on_no_data_us_(0)
00049 , data_thread_running_(false)
00050 , dataBufferDepthFragments_(0)
00051 , dataBufferDepthBytes_(0)
00052 , maxDataBufferDepthFragments_(1000)
00053 , maxDataBufferDepthBytes_(1000)
00054 , useMonitoringThread_(false)
00055 , monitoringInterval_(0)
00056 , lastMonitoringCall_()
00057 , isHardwareOK_(true)
00058 , dataBuffer_()
00059 , newDataBuffer_()
00060 , run_number_(-1)
00061 , subrun_number_(-1)
00062 , timeout_(std::numeric_limits<uint64_t>::max())
00063 , timestamp_(std::numeric_limits<uint64_t>::max())
00064 , should_stop_(false)
00065 , exception_(false)
00066 , force_stop_(false)
00067 , latest_exception_report_("none")
00068 , ev_counter_(1)
00069 , board_id_(-1)
00070 , instance_name_for_metrics_("FragmentGenerator")
00071 , sleep_on_stop_us_(0)
00072 {}
00073
00074 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(const fhicl::ParameterSet& ps)
00075 : mutex_()
00076 , windowOffset_(ps.get<Fragment::timestamp_t>("request_window_offset", 0))
00077 , windowWidth_(ps.get<Fragment::timestamp_t>("request_window_width", 0))
00078 , staleTimeout_(ps.get<Fragment::timestamp_t>("stale_request_timeout", 0xFFFFFFFF))
00079 , expectedType_(ps.get<Fragment::type_t>("expected_fragment_type", Fragment::type_t(Fragment::EmptyFragmentType)))
00080 , uniqueWindows_(ps.get<bool>("request_windows_are_unique", true))
00081 , missing_request_(false)
00082 , missing_request_time_(decltype(missing_request_time_)::max())
00083 , last_window_send_time_(decltype(last_window_send_time_)::max())
00084 , last_window_send_time_set_(false)
00085 , windows_sent_ooo_()
00086 , missing_request_window_timeout_us_(ps.get<size_t>("missing_request_window_timeout_us", 1000000))
00087 , window_close_timeout_us_(ps.get<size_t>("window_close_timeout_us", 2000000))
00088 , useDataThread_(ps.get<bool>("separate_data_thread", false))
00089 , sleep_on_no_data_us_(ps.get<size_t>("sleep_on_no_data_us", 0))
00090 , data_thread_running_(false)
00091 , dataBufferDepthFragments_(0)
00092 , dataBufferDepthBytes_(0)
00093 , maxDataBufferDepthFragments_(ps.get<int>("data_buffer_depth_fragments", 1000))
00094 , maxDataBufferDepthBytes_(ps.get<size_t>("data_buffer_depth_mb", 1000) * 1024 * 1024)
00095 , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
00096 , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
00097 , lastMonitoringCall_()
00098 , isHardwareOK_(true)
00099 , dataBuffer_()
00100 , newDataBuffer_()
00101 , run_number_(-1)
00102 , subrun_number_(-1)
00103 , timeout_(std::numeric_limits<uint64_t>::max())
00104 , timestamp_(std::numeric_limits<uint64_t>::max())
00105 , should_stop_(false)
00106 , exception_(false)
00107 , force_stop_(false)
00108 , latest_exception_report_("none")
00109 , ev_counter_(1)
00110 , board_id_(-1)
00111 , sleep_on_stop_us_(0)
00112 {
00113 board_id_ = ps.get<int>("board_id");
00114 instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
00115
00116 fragment_ids_ = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
00117
00118 TLOG_TRACE("CommandableFragmentGenerator") << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)" << TLOG_ENDL;
00119 int fragment_id = ps.get<int>("fragment_id", -99);
00120
00121 if (fragment_id != -99)
00122 {
00123 if (fragment_ids_.size() != 0)
00124 {
00125 latest_exception_report_ = "Error in CommandableFragmentGenerator: can't both define \"fragment_id\" and \"fragment_ids\" in FHiCL document";
00126 throw cet::exception(latest_exception_report_);
00127 }
00128 else
00129 {
00130 fragment_ids_.emplace_back(fragment_id);
00131 }
00132 }
00133
00134 sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
00135
00136 dataBuffer_.emplace_back(FragmentPtr(new Fragment()));
00137 (*dataBuffer_.begin())->setSystemType(Fragment::EmptyFragmentType);
00138
00139 std::string modeString = ps.get<std::string>("request_mode", "ignored");
00140 if (modeString == "single" || modeString == "Single")
00141 {
00142 mode_ = RequestMode::Single;
00143 }
00144 else if (modeString.find("buffer") != std::string::npos || modeString.find("Buffer") != std::string::npos)
00145 {
00146 mode_ = RequestMode::Buffer;
00147 }
00148 else if (modeString == "window" || modeString == "Window")
00149 {
00150 mode_ = RequestMode::Window;
00151 }
00152 else if (modeString.find("ignore") != std::string::npos || modeString.find("Ignore") != std::string::npos)
00153 {
00154 mode_ = RequestMode::Ignored;
00155 }
00156 TLOG_DEBUG("CommandableFragmentGenerator") << "Request mode is " << printMode_() << TLOG_ENDL;
00157
00158 if (mode_ != RequestMode::Ignored)
00159 {
00160 if (!useDataThread_)
00161 {
00162 latest_exception_report_ = "Error in CommandableFragmentGenerator: use_data_thread must be true when request_mode is not \"Ignored\"!";
00163 throw cet::exception(latest_exception_report_);
00164 }
00165 requestReceiver_.reset(new RequestReceiver(ps));
00166 }
00167 }
00168
00169 artdaq::CommandableFragmentGenerator::~CommandableFragmentGenerator()
00170 {
00171 joinThreads();
00172 }
00173
00174 void artdaq::CommandableFragmentGenerator::joinThreads()
00175 {
00176 should_stop_ = true;
00177 force_stop_ = true;
00178 TLOG_DEBUG("CommandableFragmentGenerator") << "Joining dataThread" << TLOG_ENDL;
00179 if (dataThread_.joinable()) dataThread_.join();
00180 TLOG_DEBUG("CommandableFragmentGenerator") << "Joining monitoringThread" << TLOG_ENDL;
00181 if (monitoringThread_.joinable()) monitoringThread_.join();
00182 requestReceiver_.reset(nullptr);
00183 }
00184
00185 bool artdaq::CommandableFragmentGenerator::getNext(FragmentPtrs& output)
00186 {
00187 bool result = true;
00188
00189 if (check_stop()) usleep(sleep_on_stop_us_);
00190 if (exception() || force_stop_) return false;
00191
00192 if (!useMonitoringThread_ && monitoringInterval_ > 0)
00193 {
00194 TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: Checking whether to collect Monitoring Data" << TLOG_ENDL;
00195 auto now = std::chrono::steady_clock::now();
00196
00197 if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
00198 {
00199 TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: Collecting Monitoring Data" << TLOG_ENDL;
00200 isHardwareOK_ = checkHWStatus_();
00201 TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ << TLOG_ENDL;
00202 lastMonitoringCall_ = now;
00203 }
00204 }
00205
00206 try
00207 {
00208 std::lock_guard<std::mutex> lk(mutex_);
00209 if (useDataThread_)
00210 {
00211 TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Calling applyRequests" << TLOG_ENDL;
00212 result = applyRequests(output);
00213 TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Done with applyRequests" << TLOG_ENDL;
00214
00215 if (exception())
00216 {
00217 throw cet::exception("CommandableFragmentGenerator") << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
00218 }
00219 }
00220 else
00221 {
00222 if (!isHardwareOK_)
00223 {
00224 TLOG_ERROR("CommandableFragmentGenerator") << "Stopping CFG because the hardware reports bad status!" << TLOG_ENDL;
00225 return false;
00226 }
00227 TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Calling getNext_ " << std::to_string(ev_counter()) << TLOG_ENDL;
00228 try
00229 {
00230 result = getNext_(output);
00231 }
00232 catch (...)
00233 {
00234 throw;
00235 }
00236 TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Done with getNext_ " << std::to_string(ev_counter()) << TLOG_ENDL;
00237 }
00238 }
00239 catch (const cet::exception& e)
00240 {
00241 latest_exception_report_ = "cet::exception caught in getNext(): ";
00242 latest_exception_report_.append(e.what());
00243 TLOG_ERROR("CommandableFragmentGenerator") << "getNext: cet::exception caught: " << e << TLOG_ENDL;
00244 set_exception(true);
00245 return false;
00246 }
00247 catch (const boost::exception& e)
00248 {
00249 latest_exception_report_ = "boost::exception caught in getNext(): ";
00250 latest_exception_report_.append(boost::diagnostic_information(e));
00251 TLOG_ERROR("CommandableFragmentGenerator") << "getNext: boost::exception caught: " << boost::diagnostic_information(e) << TLOG_ENDL;
00252 set_exception(true);
00253 return false;
00254 }
00255 catch (const std::exception& e)
00256 {
00257 latest_exception_report_ = "std::exception caught in getNext(): ";
00258 latest_exception_report_.append(e.what());
00259 TLOG_ERROR("CommandableFragmentGenerator") << "getNext: std::exception caught: " << e.what() << TLOG_ENDL;
00260 set_exception(true);
00261 return false;
00262 }
00263 catch (...)
00264 {
00265 latest_exception_report_ = "Unknown exception caught in getNext().";
00266 TLOG_ERROR("CommandableFragmentGenerator") << "getNext: unknown exception caught" << TLOG_ENDL;
00267 set_exception(true);
00268 return false;
00269 }
00270
00271 if (!result)
00272 {
00273 TLOG_DEBUG("getNext") << "stopped " << TLOG_ENDL;
00274 }
00275
00276 return result;
00277 }
00278
00279 bool artdaq::CommandableFragmentGenerator::check_stop()
00280 {
00281 TLOG_ARB(14, "CommandableFragmentGeneraotr") << "CFG::check_stop: should_stop=" << should_stop() << ", useDataThread_=" << useDataThread_ << ", exception status =" << int(exception()) << TLOG_ENDL;
00282
00283 if (!should_stop()) return false;
00284 if (!useDataThread_ || mode_ == RequestMode::Ignored) return true;
00285 if (force_stop_) return true;
00286
00287
00288 return !requestReceiver_->isRunning();
00289 }
00290
00291 int artdaq::CommandableFragmentGenerator::fragment_id() const
00292 {
00293 if (fragment_ids_.size() != 1)
00294 {
00295 throw cet::exception("Error in CommandableFragmentGenerator: can't call fragment_id() unless member fragment_ids_ vector is length 1");
00296 }
00297 else
00298 {
00299 return fragment_ids_[0];
00300 }
00301 }
00302
00303 size_t artdaq::CommandableFragmentGenerator::ev_counter_inc(size_t step, bool force)
00304 {
00305 if (force || mode_ == RequestMode::Ignored)
00306 {
00307 return ev_counter_.fetch_add(step);
00308 }
00309 return ev_counter_.load();
00310 }
00311
00312 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
00313 {
00314 if (run < 0) throw cet::exception("CommandableFragmentGenerator") << "negative run number";
00315
00316 timeout_ = timeout;
00317 timestamp_ = timestamp;
00318 ev_counter_.store(1);
00319 should_stop_.store(false);
00320 exception_.store(false);
00321 run_number_ = run;
00322 subrun_number_ = 1;
00323 latest_exception_report_ = "none";
00324 dataBuffer_.clear();
00325 last_window_send_time_set_ = false;
00326
00327 start();
00328
00329 std::unique_lock<std::mutex> lk(mutex_);
00330 if (useDataThread_) startDataThread();
00331 if (useMonitoringThread_) startMonitoringThread();
00332 if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
00333 }
00334
00335 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
00336 {
00337 TLOG_DEBUG("CommandableFragmentGenerator") << "Stop Command received." << TLOG_ENDL;
00338
00339 timeout_ = timeout;
00340 timestamp_ = timestamp;
00341
00342 stopNoMutex();
00343 should_stop_.store(true);
00344 std::unique_lock<std::mutex> lk(mutex_);
00345 stop();
00346 }
00347
00348 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
00349 {
00350 timeout_ = timeout;
00351 timestamp_ = timestamp;
00352
00353 pauseNoMutex();
00354 should_stop_.store(true);
00355 std::unique_lock<std::mutex> lk(mutex_);
00356
00357 pause();
00358 }
00359
00360 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
00361 {
00362 timeout_ = timeout;
00363 timestamp_ = timestamp;
00364
00365 subrun_number_ += 1;
00366 should_stop_ = false;
00367
00368 dataBuffer_.clear();
00369
00370
00371 resume();
00372
00373 std::unique_lock<std::mutex> lk(mutex_);
00374 if (useDataThread_) startDataThread();
00375 if (useMonitoringThread_) startMonitoringThread();
00376 if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
00377 }
00378
00379 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
00380 {
00381 std::lock_guard<std::mutex> lk(mutex_);
00382
00383
00384
00385
00386
00387
00388 std::string childReport = reportSpecific(which);
00389 if (childReport.length() > 0) { return childReport; }
00390
00391
00392 if (which == "latest_exception")
00393 {
00394 return latest_exception_report_;
00395 }
00396
00397
00398 childReport = report();
00399 if (childReport.length() > 0) { return childReport; }
00400
00401
00402 std::string tmpString = "The \"" + which + "\" command is not ";
00403 tmpString.append("currently supported by the ");
00404 tmpString.append(metricsReportingInstanceName());
00405 tmpString.append(" fragment generator.");
00406 return tmpString;
00407 }
00408
00409
00410 void artdaq::CommandableFragmentGenerator::pauseNoMutex()
00411 {
00412 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
00413 }
00414
00415 void artdaq::CommandableFragmentGenerator::pause()
00416 {
00417 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
00418 }
00419
00420 void artdaq::CommandableFragmentGenerator::resume()
00421 {
00422 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
00423 }
00424
00425 std::string artdaq::CommandableFragmentGenerator::report()
00426 {
00427 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
00428 return "";
00429 }
00430
00431 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const&)
00432 {
00433 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
00434 return "";
00435 }
00436
00437 bool artdaq::CommandableFragmentGenerator::checkHWStatus_()
00438 {
00439 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
00440 return true;
00441 }
00442
00443 void artdaq::CommandableFragmentGenerator::startDataThread()
00444 {
00445 if (dataThread_.joinable()) dataThread_.join();
00446 TLOG_INFO("CommandableFragmentGenerator") << "Starting Data Receiver Thread" << TLOG_ENDL;
00447 dataThread_ = boost::thread(&CommandableFragmentGenerator::getDataLoop, this);
00448 }
00449
00450 void artdaq::CommandableFragmentGenerator::startMonitoringThread()
00451 {
00452 if (monitoringThread_.joinable()) monitoringThread_.join();
00453 TLOG_INFO("CommandableFragmentGenerator") << "Starting Hardware Monitoring Thread" << TLOG_ENDL;
00454 monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
00455 }
00456
00457 std::string artdaq::CommandableFragmentGenerator::printMode_()
00458 {
00459 switch (mode_)
00460 {
00461 case RequestMode::Single:
00462 return "Single";
00463 case RequestMode::Buffer:
00464 return "Buffer";
00465 case RequestMode::Window:
00466 return "Window";
00467 case RequestMode::Ignored:
00468 return "Ignored";
00469 }
00470
00471 return "ERROR";
00472 }
00473
00474 void artdaq::CommandableFragmentGenerator::getDataLoop()
00475 {
00476 data_thread_running_ = true;
00477 while (!force_stop_)
00478 {
00479 if (!isHardwareOK_)
00480 {
00481 TLOG_DEBUG("CommandableFragmentGenerator") << "getDataLoop: isHardwareOK is " << isHardwareOK_ << ", aborting data thread" << TLOG_ENDL;
00482 data_thread_running_ = false;
00483 return;
00484 }
00485
00486 TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: calling getNext_" << TLOG_ENDL;
00487
00488 bool data = false;
00489 auto startdata = std::chrono::steady_clock::now();
00490
00491 try
00492 {
00493 data = getNext_(newDataBuffer_);
00494 }
00495 catch (...)
00496 {
00497 ExceptionHandler(ExceptionHandlerRethrow::no,
00498 "Exception thrown by fragment generator in CommandableFragmentGenerator::getDataLoop; setting exception state to \"true\"");
00499 set_exception(true);
00500
00501 data_thread_running_ = false;
00502 return;
00503 }
00504
00505 if (metricMan)
00506 {
00507 metricMan->sendMetric("Avg Data Acquisition Time", TimeUtils::GetElapsedTime(startdata), "s", 3, artdaq::MetricMode::Average);
00508 }
00509
00510 if (newDataBuffer_.size() == 0 && sleep_on_no_data_us_ > 0)
00511 {
00512 usleep(sleep_on_no_data_us_);
00513 }
00514
00515 TLOG_ARB(15, "CommandableFragmentGenerator") << "Waiting for data buffer ready" << TLOG_ENDL;
00516 if (!waitForDataBufferReady()) return;
00517 TLOG_ARB(15, "CommandableFragmentGenerator") << "Done waiting for data buffer ready" << TLOG_ENDL;
00518
00519 TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: processing data" << TLOG_ENDL;
00520 if (data && !force_stop_)
00521 {
00522 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00523 switch (mode_)
00524 {
00525 case RequestMode::Single:
00526
00527 while (newDataBuffer_.size() >= fragment_ids_.size())
00528 {
00529 dataBuffer_.clear();
00530 auto it = newDataBuffer_.begin();
00531 std::advance(it, fragment_ids_.size());
00532 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_, newDataBuffer_.begin(), it);
00533 }
00534 break;
00535 case RequestMode::Buffer:
00536 case RequestMode::Ignored:
00537 case RequestMode::Window:
00538 default:
00539
00540 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_);
00541 break;
00542 }
00543 getDataBufferStats();
00544 }
00545
00546 {
00547 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00548 if (dataBuffer_.size() > 0)
00549 {
00550 dataCondition_.notify_all();
00551 }
00552 }
00553 if (!data || force_stop_)
00554 {
00555 TLOG_INFO("CommandableFragmentGenerator") << "Data flow has stopped. Ending data collection thread" << TLOG_ENDL;
00556 data_thread_running_ = false;
00557 return;
00558 }
00559 }
00560 }
00561
00562 bool artdaq::CommandableFragmentGenerator::waitForDataBufferReady()
00563 {
00564 auto startwait = std::chrono::steady_clock::now();
00565 auto first = true;
00566 auto lastwaittime = 0ULL;
00567 while (dataBufferIsTooLarge())
00568 {
00569 if (should_stop())
00570 {
00571 TLOG_DEBUG("CommandableFragmentGenerator") << "Run ended while waiting for buffer to shrink!" << TLOG_ENDL;
00572 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00573 getDataBufferStats();
00574 dataCondition_.notify_all();
00575 data_thread_running_ = false;
00576 return false;
00577 }
00578 auto waittime = TimeUtils::GetElapsedTimeMilliseconds(startwait);
00579
00580 if (first || (waittime != lastwaittime && waittime % 1000 == 0))
00581 {
00582 TLOG_WARNING("CommandableFragmentGenerator") << "Bad Omen: Data Buffer has exceeded its size limits. "
00583 << "(seq_id=" << ev_counter()
00584 << ", frags=" << dataBufferDepthFragments_ << "/" << maxDataBufferDepthFragments_
00585 << ", szB=" << dataBufferDepthBytes_ << "/" << maxDataBufferDepthBytes_ << ")" << TLOG_ENDL;
00586 TLOG_TRACE("CommandableFragmentGenerator") << "Bad Omen: Possible causes include requests not getting through or Ignored-mode BR issues" << TLOG_ENDL;
00587 first = false;
00588 }
00589 if (waittime % 5 && waittime != lastwaittime)
00590 {
00591 TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: Data Retreival paused for " << std::to_string(waittime) << " ms waiting for data buffer to drain" << TLOG_ENDL;
00592 }
00593 lastwaittime = waittime;
00594 usleep(1000);
00595 }
00596 return true;
00597 }
00598
00599 bool artdaq::CommandableFragmentGenerator::dataBufferIsTooLarge()
00600 {
00601 return (maxDataBufferDepthFragments_ > 0 && dataBufferDepthFragments_ > maxDataBufferDepthFragments_) || (maxDataBufferDepthBytes_ > 0 && dataBufferDepthBytes_ > maxDataBufferDepthBytes_);
00602 }
00603
00604 void artdaq::CommandableFragmentGenerator::getDataBufferStats()
00605 {
00607 dataBufferDepthFragments_ = dataBuffer_.size();
00608 size_t acc = 0;
00609 TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: Calculating buffer size" << TLOG_ENDL;
00610 for (auto i = dataBuffer_.begin(); i != dataBuffer_.end(); ++i)
00611 {
00612 if (i->get() != nullptr)
00613 {
00614 acc += (*i)->sizeBytes();
00615 }
00616 }
00617 dataBufferDepthBytes_ = acc;
00618
00619 if (metricMan)
00620 {
00621 TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: Sending Metrics" << TLOG_ENDL;
00622 metricMan->sendMetric("Buffer Depth Fragments", dataBufferDepthFragments_.load(), "fragments", 1, MetricMode::LastPoint);
00623 metricMan->sendMetric("Buffer Depth Bytes", dataBufferDepthBytes_.load(), "bytes", 1, MetricMode::LastPoint);
00624 }
00625 TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: frags=" << dataBufferDepthFragments_.load() << "/" << maxDataBufferDepthFragments_
00626 << ", sz=" << std::to_string(dataBufferDepthBytes_.load()) << "/" << std::to_string(maxDataBufferDepthBytes_) << TLOG_ENDL;
00627 }
00628
00629 void artdaq::CommandableFragmentGenerator::checkDataBuffer()
00630 {
00631 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00632 dataCondition_.wait_for(lock, std::chrono::milliseconds(10));
00633 if (dataBufferDepthFragments_ > 0)
00634 {
00635 if ((mode_ == RequestMode::Buffer || mode_ == RequestMode::Window))
00636 {
00637
00638 while (dataBufferIsTooLarge())
00639 {
00640 dataBuffer_.erase(dataBuffer_.begin());
00641 getDataBufferStats();
00642 }
00643 if (dataBuffer_.size() > 0)
00644 {
00645 TLOG_ARB(17, "CommandableFragmentGenerator") << "Determining if Fragments can be dropped from data buffer" << TLOG_ENDL;
00646 Fragment::timestamp_t last = dataBuffer_.back()->timestamp();
00647 Fragment::timestamp_t min = last > staleTimeout_ ? last - staleTimeout_ : 0;
00648 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00649 {
00650 if ((*it)->timestamp() < min)
00651 {
00652 it = dataBuffer_.erase(it);
00653 }
00654 else
00655 {
00656 ++it;
00657 }
00658 }
00659 getDataBufferStats();
00660 }
00661 }
00662 else if (mode_ == RequestMode::Single && dataBuffer_.size() > fragment_ids_.size())
00663 {
00664
00665 while (dataBuffer_.size() > fragment_ids_.size())
00666 {
00667 dataBuffer_.erase(dataBuffer_.begin());
00668 }
00669 }
00670 }
00671 }
00672
00673 void artdaq::CommandableFragmentGenerator::getMonitoringDataLoop()
00674 {
00675 while (!force_stop_)
00676 {
00677 if (should_stop() || monitoringInterval_ <= 0)
00678 {
00679 TLOG_DEBUG("CommandableFragmentGenerator") << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
00680 << " and monitoringInterval is " << monitoringInterval_ << ", returning" << TLOG_ENDL;
00681 return;
00682 }
00683 TLOG_ARB(12, "CommandableFragmentGenerator") << "getMonitoringDataLoop: Determining whether to call checkHWStatus_" << TLOG_ENDL;
00684
00685 auto now = std::chrono::steady_clock::now();
00686 if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
00687 {
00688 isHardwareOK_ = checkHWStatus_();
00689 TLOG_ARB(12, "CommandableFragmentGenerator") << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ << TLOG_ENDL;
00690 lastMonitoringCall_ = now;
00691 }
00692 usleep(monitoringInterval_ / 10);
00693 }
00694 }
00695
00696 void artdaq::CommandableFragmentGenerator::applyRequestsIgnoredMode(artdaq::FragmentPtrs& frags)
00697 {
00698
00699 TLOG_ARB(9, "CommandableFragmentGenerator") << "Mode is Ignored; Copying data to output" << TLOG_ENDL;
00700 std::move(dataBuffer_.begin(), dataBuffer_.end(), std::inserter(frags, frags.end()));
00701 dataBuffer_.clear();
00702 }
00703
00704 void artdaq::CommandableFragmentGenerator::applyRequestsSingleMode(artdaq::FragmentPtrs& frags)
00705 {
00706
00707 auto requests = requestReceiver_->GetRequests();
00708 while (requests.size() > 1) {
00709
00710 requestReceiver_->RemoveRequest(requests.begin()->first);
00711 requests.erase(requests.begin());
00712 }
00713 sendEmptyFragments(frags, requests);
00714
00715
00716 if (requests.size() == 0 || !requests.count(ev_counter())) return;
00717
00718 if (dataBuffer_.size() > 0)
00719 {
00720 TLOG_ARB(9, "CommandableFragmentGenerator") << "Mode is Single; Sending copy of last event" << TLOG_ENDL;
00721 for (auto& fragptr : dataBuffer_)
00722 {
00723
00724 auto frag = fragptr.get();
00725 auto newfrag = std::unique_ptr<artdaq::Fragment>(new Fragment(ev_counter(), frag->fragmentID()));
00726 newfrag->resize(frag->size() - detail::RawFragmentHeader::num_words());
00727 memcpy(newfrag->headerAddress(), frag->headerAddress(), frag->sizeBytes());
00728 newfrag->setTimestamp(requests[ev_counter()]);
00729 newfrag->setSequenceID(ev_counter());
00730 frags.push_back(std::move(newfrag));
00731 }
00732 }
00733 else
00734 {
00735 sendEmptyFragment(frags, ev_counter(), "No data for");
00736 }
00737 requestReceiver_->RemoveRequest(ev_counter());
00738 ev_counter_inc(1, true);
00739 }
00740
00741 void artdaq::CommandableFragmentGenerator::applyRequestsBufferMode(artdaq::FragmentPtrs& frags)
00742 {
00743
00744 auto requests = requestReceiver_->GetRequests();
00745 while (requests.size() > 1) {
00746
00747 requestReceiver_->RemoveRequest(requests.begin()->first);
00748 requests.erase(requests.begin());
00749 }
00750 sendEmptyFragments(frags, requests);
00751
00752
00753 if (requests.size() == 0 || !requests.count(ev_counter())) return;
00754
00755 TLOG_DEBUG("CommandableFragmentGenerator") << "Creating ContainerFragment for Buffered Fragments" << TLOG_ENDL;
00756 frags.emplace_back(new artdaq::Fragment(ev_counter(), fragment_id()));
00757 frags.back()->setTimestamp(requests[ev_counter()]);
00758 ContainerFragmentLoader cfl(*frags.back());
00759 cfl.set_missing_data(false);
00760
00761
00762
00763 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00764 {
00765 TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" << TLOG_ENDL;
00766 cfl.addFragment(*it);
00767 it = dataBuffer_.erase(it);
00768 }
00769 requestReceiver_->RemoveRequest(ev_counter());
00770 ev_counter_inc(1, true);
00771 }
00772
00773 void artdaq::CommandableFragmentGenerator::applyRequestsWindowMode(artdaq::FragmentPtrs& frags)
00774 {
00775 TLOG(10) << "applyRequestsWindowMode BEGIN";
00776 if (!last_window_send_time_set_)
00777 {
00778 last_window_send_time_ = std::chrono::steady_clock::now();
00779 last_window_send_time_set_ = true;
00780 }
00781
00782 auto requests = requestReceiver_->GetRequests();
00783 bool now_have_desired_request = std::any_of(requests.begin(), requests.end(),
00784 [this](decltype(requests)::value_type& request) {
00785 return request.first == ev_counter(); });
00786
00787 if (missing_request_)
00788 {
00789 if (!now_have_desired_request && TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) > missing_request_window_timeout_us_)
00790 {
00791 TLOG_ERROR("CommandableFragmentGenerator") << "Data-taking has paused for " << TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) << " us "
00792 << "(> " << std::to_string(missing_request_window_timeout_us_) << " us) while waiting for missing data request messages."
00793 << " Sending Empty Fragments for missing requests!" << TLOG_ENDL;
00794 sendEmptyFragments(frags, requests);
00795
00796 missing_request_ = false;
00797 missing_request_time_ = decltype(missing_request_time_)::max();
00798 }
00799 else if (now_have_desired_request) {
00800 missing_request_ = false;
00801 missing_request_time_ = decltype(missing_request_time_)::max();
00802 }
00803 }
00804
00805 TLOG(10) << "applyRequestsWindowMode: Starting request processing";
00806 for (auto req = requests.begin(); req != requests.end();)
00807 {
00808 while (req->first < ev_counter() && requests.size() > 0)
00809 {
00810 TLOG(10) << "applyRequestsWindowMode: Clearing passed request for sequence ID " << req->first;
00811 requestReceiver_->RemoveRequest(req->first);
00812 req = requests.erase(req);
00813 }
00814 if (requests.size() == 0) break;
00815 if (req->first > ev_counter())
00816 {
00817 if (!missing_request_)
00818 {
00819 missing_request_ = true;
00820 missing_request_time_ = std::chrono::steady_clock::now();
00821 }
00822 }
00823 auto ts = req->second;
00824 TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Checking that data exists for request window " << std::to_string(req->first) << TLOG_ENDL;
00825 Fragment::timestamp_t min = ts > windowOffset_ ? ts - windowOffset_ : 0;
00826 Fragment::timestamp_t max = min + windowWidth_;
00827 TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: min is " << std::to_string(min) << ", max is " << std::to_string(max)
00828 << " and last point in buffer is " << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0)) << " (sz=" << std::to_string(dataBuffer_.size()) << ")" << TLOG_ENDL;
00829 bool windowClosed = dataBuffer_.size() > 0 && dataBuffer_.back()->timestamp() >= max;
00830 bool windowTimeout = TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) > window_close_timeout_us_;
00831 if (windowTimeout)
00832 {
00833 TLOG_WARNING("CommandableFragmentGenerator") << "A timeout occurred waiting for data to close the request window (max=" << std::to_string(max)
00834 << ", buffer=" << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0))
00835 << " (if no buffer in memory, this is shown as a 0)). Time waiting: "
00836 << TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) << " us "
00837 << "(> " << std::to_string(window_close_timeout_us_) << " us)." << TLOG_ENDL;
00838
00839 if (missing_request_) {
00840 TLOG_ERROR("CommandableFragmentGenerator") << "A Window timeout has occurred while there are pending requests. Sending empties." << TLOG_ENDL;
00841 sendEmptyFragments(frags, requests);
00842 }
00843 }
00844 if (windowClosed || !data_thread_running_ || windowTimeout)
00845 {
00846 TLOG_DEBUG("CommandableFragmentGenerator") << "Creating ContainerFragment for Buffered or Window-requested Fragments" << TLOG_ENDL;
00847 frags.emplace_back(new artdaq::Fragment(req->first, fragment_id()));
00848 frags.back()->setTimestamp(ts);
00849 ContainerFragmentLoader cfl(*frags.back());
00850
00851 if (!windowClosed) cfl.set_missing_data(true);
00852 if (dataBuffer_.size() > 0 && dataBuffer_.front()->timestamp() > min)
00853 {
00854 TLOG_DEBUG("CommandableFragmentGenerator") << "Request Window covers data that is either before data collection began or has fallen off the end of the buffer" << TLOG_ENDL;
00855 cfl.set_missing_data(true);
00856 }
00857
00858
00859
00860 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00861 {
00862 Fragment::timestamp_t fragT = (*it)->timestamp();
00863 if (fragT < min || fragT > max || (fragT == max && windowWidth_ > 0))
00864 {
00865 ++it;
00866 continue;
00867 }
00868
00869 TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" << TLOG_ENDL;
00870 cfl.addFragment(*it);
00871
00872 if (uniqueWindows_)
00873 {
00874 it = dataBuffer_.erase(it);
00875 }
00876 else
00877 {
00878 ++it;
00879 }
00880 }
00881 if (req->first == ev_counter())
00882 {
00883 ev_counter_inc(1, true);
00884 while (windows_sent_ooo_.count(ev_counter()))
00885 {
00886 TLOG_ARB(9, "CommandableFragmentGenerator") << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" << TLOG_ENDL;
00887 windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
00888 ev_counter_inc(1, true);
00889 }
00890 }
00891 else
00892 {
00893 windows_sent_ooo_.insert(req->first);
00894 }
00895 requestReceiver_->RemoveRequest(req->first);
00896 req = requests.erase(req);
00897 last_window_send_time_ = std::chrono::steady_clock::now();
00898 }
00899 else
00900 {
00901 ++req;
00902 }
00903 }
00904 }
00905
00906 bool artdaq::CommandableFragmentGenerator::applyRequests(artdaq::FragmentPtrs& frags)
00907 {
00908 if (check_stop() || exception())
00909 {
00910 return false;
00911 }
00912
00913
00914 if (mode_ == RequestMode::Ignored)
00915 {
00916 while (dataBufferDepthFragments_ <= 0)
00917 {
00918 if (check_stop() || exception() || !isHardwareOK_) return false;
00919 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00920 dataCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return dataBufferDepthFragments_ > 0; });
00921 }
00922 }
00923 else
00924 {
00925 if ((check_stop() && requestReceiver_->size() == 0) || exception()) return false;
00926 checkDataBuffer();
00927
00928
00929 auto counter = 0;
00930
00931 while (requestReceiver_->size() == 0 && counter < 100)
00932 {
00933 if (check_stop() || exception()) return false;
00934
00935 checkDataBuffer();
00936
00937 requestReceiver_->WaitForRequests(10);
00938 counter++;
00939 }
00940 }
00941
00942 {
00943 std::unique_lock<std::mutex> dlk(dataBufferMutex_);
00944
00945 switch (mode_)
00946 {
00947 case RequestMode::Single:
00948 applyRequestsSingleMode(frags);
00949 break;
00950 case RequestMode::Window:
00951 applyRequestsWindowMode(frags);
00952 break;
00953 case RequestMode::Buffer:
00954 applyRequestsBufferMode(frags);
00955 break;
00956 case RequestMode::Ignored:
00957 default:
00958 applyRequestsIgnoredMode(frags);
00959 break;
00960 }
00961
00962 getDataBufferStats();
00963 }
00964
00965 if (frags.size() > 0)
00966 TLOG_ARB(9, "CommandableFragmentGenerator") << "Finished Processing Event " << std::to_string(ev_counter() + 1) << " for fragment_id " << fragment_id() << "." << TLOG_ENDL;
00967 return true;
00968 }
00969
00970 bool artdaq::CommandableFragmentGenerator::sendEmptyFragment(artdaq::FragmentPtrs& frags, size_t seqId, std::string desc)
00971 {
00972 TLOG_WARNING("CommandableFragmentGenerator") << desc << " sequence ID " << seqId << ", sending empty fragment" << TLOG_ENDL;
00973 for (auto fid : fragment_ids_)
00974 {
00975 auto frag = new Fragment();
00976 frag->setSequenceID(seqId);
00977 frag->setFragmentID(fid);
00978 frag->setSystemType(Fragment::EmptyFragmentType);
00979 frags.emplace_back(FragmentPtr(frag));
00980 }
00981 return true;
00982 }
00983
00984 void artdaq::CommandableFragmentGenerator::sendEmptyFragments(artdaq::FragmentPtrs& frags, std::map<Fragment::sequence_id_t, Fragment::timestamp_t>& requests)
00985 {
00986 if (requests.size() == 0 && windows_sent_ooo_.size() == 0) return;
00987
00988 if (requests.size() > 0) {
00989 TLOG_ARB(19, "CommandableFragmentGenerator") << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << requests.begin()->first << TLOG_ENDL;
00990 while (requests.begin()->first > ev_counter())
00991 {
00992 sendEmptyFragment(frags, ev_counter(), "Missed request for");
00993 ev_counter_inc(1, true);
00994 }
00995 }
00996 else if (windows_sent_ooo_.size() > 0)
00997 {
00998 TLOG_ARB(19, "CommandableFragmentGenerator") << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << *windows_sent_ooo_.begin() << TLOG_ENDL;
00999 while (*windows_sent_ooo_.begin() > ev_counter())
01000 {
01001 sendEmptyFragment(frags, ev_counter(), "Missed request for");
01002 ev_counter_inc(1, true);
01003 }
01004 }
01005 while (windows_sent_ooo_.count(ev_counter()))
01006 {
01007 TLOG_ARB(19, "CommandableFragmentGenerator") << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" << TLOG_ENDL;
01008 windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
01009 ev_counter_inc(1, true);
01010 }
01011 }