00001 #define TRACE_NAME (app_name + "_CommandableFragmentGenerator").c_str() // include these 2 first -
00002 #include "artdaq/DAQdata/Globals.hh"
00003
00004 #include "artdaq/Application/CommandableFragmentGenerator.hh"
00005
00006 #include <boost/exception/all.hpp>
00007 #include <boost/throw_exception.hpp>
00008
00009 #include <limits>
00010 #include <iterator>
00011
00012 #include "canvas/Utilities/Exception.h"
00013 #include "cetlib_except/exception.h"
00014 #include "fhiclcpp/ParameterSet.h"
00015
00016 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
00017 #include "artdaq-core/Data/Fragment.hh"
00018 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
00019 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00020 #include "artdaq-core/Utilities/TimeUtils.hh"
00021
00022 #include <fstream>
00023 #include <iomanip>
00024 #include <iterator>
00025 #include <iostream>
00026 #include <iomanip>
00027 #include <algorithm>
00028 #include <sys/poll.h>
00029 #include "artdaq/DAQdata/TCPConnect.hh"
00030
00031 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator()
00032 : mutex_()
00033 , requestReceiver_(new RequestReceiver())
00034 , windowOffset_(0)
00035 , windowWidth_(0)
00036 , staleTimeout_(Fragment::InvalidTimestamp)
00037 , expectedType_(Fragment::EmptyFragmentType)
00038 , maxFragmentCount_(std::numeric_limits<size_t>::max())
00039 , uniqueWindows_(true)
00040 , missing_request_(true)
00041 , missing_request_time_()
00042 , last_window_send_time_()
00043 , last_window_send_time_set_(false)
00044 , windows_sent_ooo_()
00045 , missing_request_window_timeout_us_(1000000)
00046 , window_close_timeout_us_(2000000)
00047 , useDataThread_(false)
00048 , sleep_on_no_data_us_(0)
00049 , data_thread_running_(false)
00050 , dataBufferDepthFragments_(0)
00051 , dataBufferDepthBytes_(0)
00052 , maxDataBufferDepthFragments_(1000)
00053 , maxDataBufferDepthBytes_(1000)
00054 , useMonitoringThread_(false)
00055 , monitoringInterval_(0)
00056 , lastMonitoringCall_()
00057 , isHardwareOK_(true)
00058 , dataBuffer_()
00059 , newDataBuffer_()
00060 , run_number_(-1)
00061 , subrun_number_(-1)
00062 , timeout_(std::numeric_limits<uint64_t>::max())
00063 , timestamp_(std::numeric_limits<uint64_t>::max())
00064 , should_stop_(false)
00065 , exception_(false)
00066 , force_stop_(false)
00067 , latest_exception_report_("none")
00068 , ev_counter_(1)
00069 , board_id_(-1)
00070 , instance_name_for_metrics_("FragmentGenerator")
00071 , sleep_on_stop_us_(0)
00072 {}
00073
00074 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(const fhicl::ParameterSet& ps)
00075 : mutex_()
00076 , windowOffset_(ps.get<Fragment::timestamp_t>("request_window_offset", 0))
00077 , windowWidth_(ps.get<Fragment::timestamp_t>("request_window_width", 0))
00078 , staleTimeout_(ps.get<Fragment::timestamp_t>("stale_request_timeout", 0xFFFFFFFF))
00079 , expectedType_(ps.get<Fragment::type_t>("expected_fragment_type", Fragment::type_t(Fragment::EmptyFragmentType)))
00080 , uniqueWindows_(ps.get<bool>("request_windows_are_unique", true))
00081 , missing_request_(false)
00082 , missing_request_time_(decltype(missing_request_time_)::max())
00083 , last_window_send_time_(decltype(last_window_send_time_)::max())
00084 , last_window_send_time_set_(false)
00085 , windows_sent_ooo_()
00086 , missing_request_window_timeout_us_(ps.get<size_t>("missing_request_window_timeout_us", 1000000))
00087 , window_close_timeout_us_(ps.get<size_t>("window_close_timeout_us", 2000000))
00088 , useDataThread_(ps.get<bool>("separate_data_thread", false))
00089 , sleep_on_no_data_us_(ps.get<size_t>("sleep_on_no_data_us", 0))
00090 , data_thread_running_(false)
00091 , dataBufferDepthFragments_(0)
00092 , dataBufferDepthBytes_(0)
00093 , maxDataBufferDepthFragments_(ps.get<int>("data_buffer_depth_fragments", 1000))
00094 , maxDataBufferDepthBytes_(ps.get<size_t>("data_buffer_depth_mb", 1000) * 1024 * 1024)
00095 , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
00096 , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
00097 , lastMonitoringCall_()
00098 , isHardwareOK_(true)
00099 , dataBuffer_()
00100 , newDataBuffer_()
00101 , run_number_(-1)
00102 , subrun_number_(-1)
00103 , timeout_(std::numeric_limits<uint64_t>::max())
00104 , timestamp_(std::numeric_limits<uint64_t>::max())
00105 , should_stop_(false)
00106 , exception_(false)
00107 , force_stop_(false)
00108 , latest_exception_report_("none")
00109 , ev_counter_(1)
00110 , board_id_(-1)
00111 , sleep_on_stop_us_(0)
00112 {
00113 board_id_ = ps.get<int>("board_id");
00114 instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
00115
00116 fragment_ids_ = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
00117
00118 TLOG(TLVL_TRACE) << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)" ;
00119 int fragment_id = ps.get<int>("fragment_id", -99);
00120
00121 if (fragment_id != -99)
00122 {
00123 if (fragment_ids_.size() != 0)
00124 {
00125 latest_exception_report_ = "Error in CommandableFragmentGenerator: can't both define \"fragment_id\" and \"fragment_ids\" in FHiCL document";
00126 throw cet::exception(latest_exception_report_);
00127 }
00128 else
00129 {
00130 fragment_ids_.emplace_back(fragment_id);
00131 }
00132 }
00133
00134 sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
00135
00136 dataBuffer_.emplace_back(FragmentPtr(new Fragment()));
00137 (*dataBuffer_.begin())->setSystemType(Fragment::EmptyFragmentType);
00138
00139 std::string modeString = ps.get<std::string>("request_mode", "ignored");
00140 if (modeString == "single" || modeString == "Single")
00141 {
00142 mode_ = RequestMode::Single;
00143 }
00144 else if (modeString.find("buffer") != std::string::npos || modeString.find("Buffer") != std::string::npos)
00145 {
00146 mode_ = RequestMode::Buffer;
00147 }
00148 else if (modeString == "window" || modeString == "Window")
00149 {
00150 mode_ = RequestMode::Window;
00151 }
00152 else if (modeString.find("ignore") != std::string::npos || modeString.find("Ignore") != std::string::npos)
00153 {
00154 mode_ = RequestMode::Ignored;
00155 }
00156 TLOG(TLVL_DEBUG) << "Request mode is " << printMode_() ;
00157
00158 if (mode_ != RequestMode::Ignored)
00159 {
00160 if (!useDataThread_)
00161 {
00162 latest_exception_report_ = "Error in CommandableFragmentGenerator: use_data_thread must be true when request_mode is not \"Ignored\"!";
00163 throw cet::exception(latest_exception_report_);
00164 }
00165 requestReceiver_.reset(new RequestReceiver(ps));
00166 }
00167 }
00168
00169 artdaq::CommandableFragmentGenerator::~CommandableFragmentGenerator()
00170 {
00171 joinThreads();
00172 }
00173
00174 void artdaq::CommandableFragmentGenerator::joinThreads()
00175 {
00176 should_stop_ = true;
00177 force_stop_ = true;
00178 TLOG(TLVL_DEBUG) << "Joining dataThread" ;
00179 if (dataThread_.joinable()) dataThread_.join();
00180 TLOG(TLVL_DEBUG) << "Joining monitoringThread" ;
00181 if (monitoringThread_.joinable()) monitoringThread_.join();
00182 requestReceiver_.reset(nullptr);
00183 }
00184
00185 bool artdaq::CommandableFragmentGenerator::getNext(FragmentPtrs& output)
00186 {
00187 bool result = true;
00188
00189 if (check_stop()) usleep(sleep_on_stop_us_);
00190 if (exception() || force_stop_) return false;
00191
00192 if (!useMonitoringThread_ && monitoringInterval_ > 0)
00193 {
00194 TLOG(10) << "getNext: Checking whether to collect Monitoring Data" ;
00195 auto now = std::chrono::steady_clock::now();
00196
00197 if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
00198 {
00199 TLOG(10) << "getNext: Collecting Monitoring Data" ;
00200 isHardwareOK_ = checkHWStatus_();
00201 TLOG(10) << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ ;
00202 lastMonitoringCall_ = now;
00203 }
00204 }
00205
00206 try
00207 {
00208 std::lock_guard<std::mutex> lk(mutex_);
00209 if (useDataThread_)
00210 {
00211 TLOG(TLVL_TRACE) << "getNext: Calling applyRequests" ;
00212 result = applyRequests(output);
00213 TLOG(TLVL_TRACE) << "getNext: Done with applyRequests result=" << std::boolalpha << result;
00214
00215 if (exception())
00216 {
00217 TLOG(TLVL_ERROR) << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
00218 throw cet::exception("CommandableFragmentGenerator") << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
00219 }
00220 }
00221 else
00222 {
00223 if (!isHardwareOK_)
00224 {
00225 TLOG(TLVL_ERROR) << "Stopping CFG because the hardware reports bad status!" ;
00226 return false;
00227 }
00228 TLOG(TLVL_TRACE) << "getNext: Calling getNext_ " << std::to_string(ev_counter()) ;
00229 try
00230 {
00231 result = getNext_(output);
00232 }
00233 catch (...)
00234 {
00235 throw;
00236 }
00237 TLOG(TLVL_TRACE) << "getNext: Done with getNext_ " << std::to_string(ev_counter()) ;
00238 }
00239 }
00240 catch (const cet::exception& e)
00241 {
00242 latest_exception_report_ = "cet::exception caught in getNext(): ";
00243 latest_exception_report_.append(e.what());
00244 TLOG(TLVL_ERROR) << "getNext: cet::exception caught: " << e ;
00245 set_exception(true);
00246 return false;
00247 }
00248 catch (const boost::exception& e)
00249 {
00250 latest_exception_report_ = "boost::exception caught in getNext(): ";
00251 latest_exception_report_.append(boost::diagnostic_information(e));
00252 TLOG(TLVL_ERROR) << "getNext: boost::exception caught: " << boost::diagnostic_information(e) ;
00253 set_exception(true);
00254 return false;
00255 }
00256 catch (const std::exception& e)
00257 {
00258 latest_exception_report_ = "std::exception caught in getNext(): ";
00259 latest_exception_report_.append(e.what());
00260 TLOG(TLVL_ERROR) << "getNext: std::exception caught: " << e.what() ;
00261 set_exception(true);
00262 return false;
00263 }
00264 catch (...)
00265 {
00266 latest_exception_report_ = "Unknown exception caught in getNext().";
00267 TLOG(TLVL_ERROR) << "getNext: unknown exception caught" ;
00268 set_exception(true);
00269 return false;
00270 }
00271
00272 if (!result)
00273 {
00274 TLOG(TLVL_DEBUG) << "stopped " ;
00275 }
00276
00277 return result;
00278 }
00279
00280 bool artdaq::CommandableFragmentGenerator::check_stop()
00281 {
00282 TLOG(14) << "CFG::check_stop: should_stop=" << should_stop() << ", useDataThread_=" << useDataThread_ << ", exception status =" << int(exception()) ;
00283
00284 if (!should_stop()) return false;
00285 if (!useDataThread_ || mode_ == RequestMode::Ignored) return true;
00286 if (force_stop_) return true;
00287
00288
00289 return !requestReceiver_->isRunning();
00290 }
00291
00292 int artdaq::CommandableFragmentGenerator::fragment_id() const
00293 {
00294 if (fragment_ids_.size() != 1)
00295 {
00296 throw cet::exception("Error in CommandableFragmentGenerator: can't call fragment_id() unless member fragment_ids_ vector is length 1");
00297 }
00298 else
00299 {
00300 return fragment_ids_[0];
00301 }
00302 }
00303
00304 size_t artdaq::CommandableFragmentGenerator::ev_counter_inc(size_t step, bool force)
00305 {
00306 if (force || mode_ == RequestMode::Ignored)
00307 {
00308 return ev_counter_.fetch_add(step);
00309 }
00310 return ev_counter_.load();
00311 }
00312
00313 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
00314 {
00315 if (run < 0) throw cet::exception("CommandableFragmentGenerator") << "negative run number";
00316
00317 timeout_ = timeout;
00318 timestamp_ = timestamp;
00319 ev_counter_.store(1);
00320 missing_request_ = false;
00321 should_stop_.store(false);
00322 exception_.store(false);
00323 run_number_ = run;
00324 subrun_number_ = 1;
00325 latest_exception_report_ = "none";
00326 dataBuffer_.clear();
00327 last_window_send_time_set_ = false;
00328 windows_sent_ooo_.clear();
00329
00330 start();
00331
00332 std::unique_lock<std::mutex> lk(mutex_);
00333 if (useDataThread_) startDataThread();
00334 if (useMonitoringThread_) startMonitoringThread();
00335 if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
00336 }
00337
00338 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
00339 {
00340 TLOG(TLVL_DEBUG) << "Stop Command received." ;
00341
00342 timeout_ = timeout;
00343 timestamp_ = timestamp;
00344 if (requestReceiver_ && requestReceiver_->isRunning()) requestReceiver_->stopRequestReceiverThread();
00345
00346 stopNoMutex();
00347 should_stop_.store(true);
00348 std::unique_lock<std::mutex> lk(mutex_);
00349 stop();
00350 TLOG(TLVL_DEBUG) << "Stop command complete.";
00351 }
00352
00353 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
00354 {
00355 timeout_ = timeout;
00356 timestamp_ = timestamp;
00357 if (requestReceiver_->isRunning()) requestReceiver_->stopRequestReceiverThread();
00358
00359 pauseNoMutex();
00360 should_stop_.store(true);
00361 std::unique_lock<std::mutex> lk(mutex_);
00362
00363 pause();
00364 }
00365
00366 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
00367 {
00368 timeout_ = timeout;
00369 timestamp_ = timestamp;
00370
00371 subrun_number_ += 1;
00372 should_stop_ = false;
00373
00374 dataBuffer_.clear();
00375
00376
00377 resume();
00378
00379 std::unique_lock<std::mutex> lk(mutex_);
00380 if (useDataThread_) startDataThread();
00381 if (useMonitoringThread_) startMonitoringThread();
00382 if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
00383 }
00384
00385 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
00386 {
00387 std::lock_guard<std::mutex> lk(mutex_);
00388
00389
00390
00391
00392
00393
00394 std::string childReport = reportSpecific(which);
00395 if (childReport.length() > 0) { return childReport; }
00396
00397
00398 if (which == "latest_exception")
00399 {
00400 return latest_exception_report_;
00401 }
00402
00403
00404 childReport = report();
00405 if (childReport.length() > 0) { return childReport; }
00406
00407
00408 std::string tmpString = "The \"" + which + "\" command is not ";
00409 tmpString.append("currently supported by the ");
00410 tmpString.append(metricsReportingInstanceName());
00411 tmpString.append(" fragment generator.");
00412 return tmpString;
00413 }
00414
00415
00416 void artdaq::CommandableFragmentGenerator::pauseNoMutex()
00417 {
00418 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
00419 }
00420
00421 void artdaq::CommandableFragmentGenerator::pause()
00422 {
00423 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
00424 }
00425
00426 void artdaq::CommandableFragmentGenerator::resume()
00427 {
00428 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
00429 }
00430
00431 std::string artdaq::CommandableFragmentGenerator::report()
00432 {
00433 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
00434 return "";
00435 }
00436
00437 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const&)
00438 {
00439 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
00440 return "";
00441 }
00442
00443 bool artdaq::CommandableFragmentGenerator::checkHWStatus_()
00444 {
00445 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
00446 return true;
00447 }
00448
00449 bool artdaq::CommandableFragmentGenerator::metaCommand(std::string const&, std::string const&)
00450 {
00451 #pragma message "Using default implementation of CommandableFragmentGenerator::metaCommand(std::string, std::string)"
00452 return true;
00453 }
00454
00455 void artdaq::CommandableFragmentGenerator::startDataThread()
00456 {
00457 if (dataThread_.joinable()) dataThread_.join();
00458 TLOG(TLVL_INFO) << "Starting Data Receiver Thread" ;
00459 dataThread_ = boost::thread(&CommandableFragmentGenerator::getDataLoop, this);
00460 }
00461
00462 void artdaq::CommandableFragmentGenerator::startMonitoringThread()
00463 {
00464 if (monitoringThread_.joinable()) monitoringThread_.join();
00465 TLOG(TLVL_INFO) << "Starting Hardware Monitoring Thread" ;
00466 monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
00467 }
00468
00469 std::string artdaq::CommandableFragmentGenerator::printMode_()
00470 {
00471 switch (mode_)
00472 {
00473 case RequestMode::Single:
00474 return "Single";
00475 case RequestMode::Buffer:
00476 return "Buffer";
00477 case RequestMode::Window:
00478 return "Window";
00479 case RequestMode::Ignored:
00480 return "Ignored";
00481 }
00482
00483 return "ERROR";
00484 }
00485
00486 void artdaq::CommandableFragmentGenerator::getDataLoop()
00487 {
00488 data_thread_running_ = true;
00489 while (!force_stop_)
00490 {
00491 if (!isHardwareOK_)
00492 {
00493 TLOG(TLVL_DEBUG) << "getDataLoop: isHardwareOK is " << isHardwareOK_ << ", aborting data thread" ;
00494 data_thread_running_ = false;
00495 return;
00496 }
00497
00498 TLOG(13) << "getDataLoop: calling getNext_" ;
00499
00500 bool data = false;
00501 auto startdata = std::chrono::steady_clock::now();
00502
00503 try
00504 {
00505 data = getNext_(newDataBuffer_);
00506 }
00507 catch (...)
00508 {
00509 ExceptionHandler(ExceptionHandlerRethrow::no,
00510 "Exception thrown by fragment generator in CommandableFragmentGenerator::getDataLoop; setting exception state to \"true\"");
00511 set_exception(true);
00512
00513 data_thread_running_ = false;
00514 return;
00515 }
00516
00517 if (metricMan)
00518 {
00519 metricMan->sendMetric("Avg Data Acquisition Time", TimeUtils::GetElapsedTime(startdata), "s", 3, artdaq::MetricMode::Average);
00520 }
00521
00522 if (newDataBuffer_.size() == 0 && sleep_on_no_data_us_ > 0)
00523 {
00524 usleep(sleep_on_no_data_us_);
00525 }
00526
00527 TLOG(15) << "Waiting for data buffer ready" ;
00528 if (!waitForDataBufferReady()) return;
00529 TLOG(15) << "Done waiting for data buffer ready" ;
00530
00531 TLOG(13) << "getDataLoop: processing data" ;
00532 if (data && !force_stop_)
00533 {
00534 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00535 switch (mode_)
00536 {
00537 case RequestMode::Single:
00538
00539 while (newDataBuffer_.size() >= fragment_ids_.size())
00540 {
00541 dataBuffer_.clear();
00542 auto it = newDataBuffer_.begin();
00543 std::advance(it, fragment_ids_.size());
00544 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_, newDataBuffer_.begin(), it);
00545 }
00546 break;
00547 case RequestMode::Buffer:
00548 case RequestMode::Ignored:
00549 case RequestMode::Window:
00550 default:
00551
00552 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_);
00553 break;
00554 }
00555 getDataBufferStats();
00556 }
00557
00558 {
00559 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00560 if (dataBuffer_.size() > 0)
00561 {
00562 dataCondition_.notify_all();
00563 }
00564 }
00565 if (!data || force_stop_)
00566 {
00567 TLOG(TLVL_INFO) << "Data flow has stopped. Ending data collection thread" ;
00568 data_thread_running_ = false;
00569 if (requestReceiver_) requestReceiver_->ClearRequests();
00570 dataBuffer_.clear();
00571 newDataBuffer_.clear();
00572 return;
00573 }
00574 }
00575 }
00576
00577 bool artdaq::CommandableFragmentGenerator::waitForDataBufferReady()
00578 {
00579 auto startwait = std::chrono::steady_clock::now();
00580 auto first = true;
00581 auto lastwaittime = 0ULL;
00582 while (dataBufferIsTooLarge())
00583 {
00584 if (should_stop())
00585 {
00586 TLOG(TLVL_DEBUG) << "Run ended while waiting for buffer to shrink!" ;
00587 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00588 getDataBufferStats();
00589 dataCondition_.notify_all();
00590 data_thread_running_ = false;
00591 return false;
00592 }
00593 auto waittime = TimeUtils::GetElapsedTimeMilliseconds(startwait);
00594
00595 if (first || (waittime != lastwaittime && waittime % 1000 == 0))
00596 {
00597 TLOG(TLVL_WARNING) << "Bad Omen: Data Buffer has exceeded its size limits. "
00598 << "(seq_id=" << ev_counter()
00599 << ", frags=" << dataBufferDepthFragments_ << "/" << maxDataBufferDepthFragments_
00600 << ", szB=" << dataBufferDepthBytes_ << "/" << maxDataBufferDepthBytes_ << ")" ;
00601 TLOG(TLVL_TRACE) << "Bad Omen: Possible causes include requests not getting through or Ignored-mode BR issues" ;
00602 first = false;
00603 }
00604 if (waittime % 5 && waittime != lastwaittime)
00605 {
00606 TLOG(13) << "getDataLoop: Data Retreival paused for " << std::to_string(waittime) << " ms waiting for data buffer to drain" ;
00607 }
00608 lastwaittime = waittime;
00609 usleep(1000);
00610 }
00611 return true;
00612 }
00613
00614 bool artdaq::CommandableFragmentGenerator::dataBufferIsTooLarge()
00615 {
00616 return (maxDataBufferDepthFragments_ > 0 && dataBufferDepthFragments_ > maxDataBufferDepthFragments_) || (maxDataBufferDepthBytes_ > 0 && dataBufferDepthBytes_ > maxDataBufferDepthBytes_);
00617 }
00618
00619 void artdaq::CommandableFragmentGenerator::getDataBufferStats()
00620 {
00622 dataBufferDepthFragments_ = dataBuffer_.size();
00623 size_t acc = 0;
00624 TLOG(15) << "getDataBufferStats: Calculating buffer size" ;
00625 for (auto i = dataBuffer_.begin(); i != dataBuffer_.end(); ++i)
00626 {
00627 if (i->get() != nullptr)
00628 {
00629 acc += (*i)->sizeBytes();
00630 }
00631 }
00632 dataBufferDepthBytes_ = acc;
00633
00634 if (metricMan)
00635 {
00636 TLOG(15) << "getDataBufferStats: Sending Metrics" ;
00637 metricMan->sendMetric("Buffer Depth Fragments", dataBufferDepthFragments_.load(), "fragments", 1, MetricMode::LastPoint);
00638 metricMan->sendMetric("Buffer Depth Bytes", dataBufferDepthBytes_.load(), "bytes", 1, MetricMode::LastPoint);
00639 }
00640 TLOG(15) << "getDataBufferStats: frags=" << dataBufferDepthFragments_.load() << "/" << maxDataBufferDepthFragments_
00641 << ", sz=" << std::to_string(dataBufferDepthBytes_.load()) << "/" << std::to_string(maxDataBufferDepthBytes_) ;
00642 }
00643
00644 void artdaq::CommandableFragmentGenerator::checkDataBuffer()
00645 {
00646 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00647 dataCondition_.wait_for(lock, std::chrono::milliseconds(10));
00648 if (dataBufferDepthFragments_ > 0)
00649 {
00650 if ((mode_ == RequestMode::Buffer || mode_ == RequestMode::Window))
00651 {
00652
00653 while (dataBufferIsTooLarge())
00654 {
00655 dataBuffer_.erase(dataBuffer_.begin());
00656 getDataBufferStats();
00657 }
00658 if (dataBuffer_.size() > 0)
00659 {
00660 TLOG(17) << "Determining if Fragments can be dropped from data buffer" ;
00661 Fragment::timestamp_t last = dataBuffer_.back()->timestamp();
00662 Fragment::timestamp_t min = last > staleTimeout_ ? last - staleTimeout_ : 0;
00663 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00664 {
00665 if ((*it)->timestamp() < min)
00666 {
00667 it = dataBuffer_.erase(it);
00668 }
00669 else
00670 {
00671 ++it;
00672 }
00673 }
00674 getDataBufferStats();
00675 }
00676 }
00677 else if (mode_ == RequestMode::Single && dataBuffer_.size() > fragment_ids_.size())
00678 {
00679
00680 while (dataBuffer_.size() > fragment_ids_.size())
00681 {
00682 dataBuffer_.erase(dataBuffer_.begin());
00683 }
00684 }
00685 }
00686 }
00687
00688 void artdaq::CommandableFragmentGenerator::getMonitoringDataLoop()
00689 {
00690 while (!force_stop_)
00691 {
00692 if (should_stop() || monitoringInterval_ <= 0)
00693 {
00694 TLOG(TLVL_DEBUG) << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
00695 << " and monitoringInterval is " << monitoringInterval_ << ", returning" ;
00696 return;
00697 }
00698 TLOG(12) << "getMonitoringDataLoop: Determining whether to call checkHWStatus_" ;
00699
00700 auto now = std::chrono::steady_clock::now();
00701 if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
00702 {
00703 isHardwareOK_ = checkHWStatus_();
00704 TLOG(12) << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ ;
00705 lastMonitoringCall_ = now;
00706 }
00707 usleep(monitoringInterval_ / 10);
00708 }
00709 }
00710
00711 void artdaq::CommandableFragmentGenerator::applyRequestsIgnoredMode(artdaq::FragmentPtrs& frags)
00712 {
00713
00714 TLOG(9) << "Mode is Ignored; Copying data to output" ;
00715 std::move(dataBuffer_.begin(), dataBuffer_.end(), std::inserter(frags, frags.end()));
00716 dataBuffer_.clear();
00717 }
00718
00719 void artdaq::CommandableFragmentGenerator::applyRequestsSingleMode(artdaq::FragmentPtrs& frags)
00720 {
00721
00722 auto requests = requestReceiver_->GetRequests();
00723 while (requests.size() > 1) {
00724
00725 requestReceiver_->RemoveRequest(requests.begin()->first);
00726 requests.erase(requests.begin());
00727 }
00728 sendEmptyFragments(frags, requests);
00729
00730
00731 if (requests.size() == 0 || !requests.count(ev_counter())) return;
00732
00733 if (dataBuffer_.size() > 0)
00734 {
00735 TLOG(9) << "Mode is Single; Sending copy of last event" ;
00736 for (auto& fragptr : dataBuffer_)
00737 {
00738
00739 auto frag = fragptr.get();
00740 auto newfrag = std::unique_ptr<artdaq::Fragment>(new Fragment(ev_counter(), frag->fragmentID()));
00741 newfrag->resize(frag->size() - detail::RawFragmentHeader::num_words());
00742 memcpy(newfrag->headerAddress(), frag->headerAddress(), frag->sizeBytes());
00743 newfrag->setTimestamp(requests[ev_counter()]);
00744 newfrag->setSequenceID(ev_counter());
00745 frags.push_back(std::move(newfrag));
00746 }
00747 }
00748 else
00749 {
00750 sendEmptyFragment(frags, ev_counter(), "No data for");
00751 }
00752 requestReceiver_->RemoveRequest(ev_counter());
00753 ev_counter_inc(1, true);
00754 }
00755
00756 void artdaq::CommandableFragmentGenerator::applyRequestsBufferMode(artdaq::FragmentPtrs& frags)
00757 {
00758
00759 auto requests = requestReceiver_->GetRequests();
00760 while (requests.size() > 1) {
00761
00762 requestReceiver_->RemoveRequest(requests.begin()->first);
00763 requests.erase(requests.begin());
00764 }
00765 sendEmptyFragments(frags, requests);
00766
00767
00768 if (requests.size() == 0 || !requests.count(ev_counter())) return;
00769
00770 TLOG(TLVL_DEBUG) << "Creating ContainerFragment for Buffered Fragments" ;
00771 frags.emplace_back(new artdaq::Fragment(ev_counter(), fragment_id()));
00772 frags.back()->setTimestamp(requests[ev_counter()]);
00773 ContainerFragmentLoader cfl(*frags.back());
00774 cfl.set_missing_data(false);
00775
00776
00777
00778 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00779 {
00780 TLOG(9) << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" ;
00781 cfl.addFragment(*it);
00782 it = dataBuffer_.erase(it);
00783 }
00784 requestReceiver_->RemoveRequest(ev_counter());
00785 ev_counter_inc(1, true);
00786 }
00787
00788 void artdaq::CommandableFragmentGenerator::applyRequestsWindowMode(artdaq::FragmentPtrs& frags)
00789 {
00790 TLOG(10) << "applyRequestsWindowMode BEGIN";
00791 if (!last_window_send_time_set_)
00792 {
00793 last_window_send_time_ = std::chrono::steady_clock::now();
00794 last_window_send_time_set_ = true;
00795 }
00796
00797 auto requests = requestReceiver_->GetRequests();
00798 bool now_have_desired_request = std::any_of(requests.begin(), requests.end(),
00799 [this](decltype(requests)::value_type& request) {
00800 return request.first == ev_counter(); });
00801
00802 if (missing_request_)
00803 {
00804 if (!now_have_desired_request && TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) > missing_request_window_timeout_us_)
00805 {
00806 TLOG(TLVL_ERROR) << "Data-taking has paused for " << TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) << " us "
00807 << "(> " << std::to_string(missing_request_window_timeout_us_) << " us) while waiting for missing data request messages."
00808 << " Sending Empty Fragments for missing requests!" ;
00809 sendEmptyFragments(frags, requests);
00810
00811 missing_request_ = false;
00812 missing_request_time_ = decltype(missing_request_time_)::max();
00813 }
00814 else if (now_have_desired_request) {
00815 missing_request_ = false;
00816 missing_request_time_ = decltype(missing_request_time_)::max();
00817 }
00818 }
00819
00820 TLOG(10) << "applyRequestsWindowMode: Starting request processing";
00821 for (auto req = requests.begin(); req != requests.end();)
00822 {
00823 TLOG(10, "CommandableFragmentGenerator") << "applyRequestsWindowMode: processing request with sequence ID " << \
00824 req->first << ", timestamp " << req->second;
00825
00826
00827 while (req->first < ev_counter() && requests.size() > 0)
00828 {
00829 TLOG(10) << "applyRequestsWindowMode: Clearing passed request for sequence ID " << req->first;
00830 requestReceiver_->RemoveRequest(req->first);
00831 req = requests.erase(req);
00832 }
00833 if (requests.size() == 0) break;
00834 if (req->first > ev_counter())
00835 {
00836 if (!missing_request_)
00837 {
00838 missing_request_ = true;
00839 missing_request_time_ = std::chrono::steady_clock::now();
00840 }
00841 }
00842 auto ts = req->second;
00843 TLOG(9) << "ApplyRequests: Checking that data exists for request window " << std::to_string(req->first) ;
00844 Fragment::timestamp_t min = ts > windowOffset_ ? ts - windowOffset_ : 0;
00845 Fragment::timestamp_t max = min + windowWidth_;
00846 TLOG(9) << "ApplyRequests: min is " << std::to_string(min) << ", max is " << std::to_string(max)
00847 << " and last point in buffer is " << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0)) << " (sz=" << std::to_string(dataBuffer_.size()) << ")" ;
00848 bool windowClosed = dataBuffer_.size() > 0 && dataBuffer_.back()->timestamp() >= max;
00849 bool windowTimeout = TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) > window_close_timeout_us_;
00850 if (windowTimeout)
00851 {
00852 TLOG(TLVL_WARNING) << "A timeout occurred waiting for data to close the request window (max=" << std::to_string(max)
00853 << ", buffer=" << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0))
00854 << " (if no buffer in memory, this is shown as a 0)). Time waiting: "
00855 << TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) << " us "
00856 << "(> " << std::to_string(window_close_timeout_us_) << " us)." ;
00857
00858 if (missing_request_) {
00859 TLOG(TLVL_ERROR) << "A Window timeout has occurred while there are pending requests. Sending empties." ;
00860 sendEmptyFragments(frags, requests);
00861 }
00862 }
00863 if (windowClosed || !data_thread_running_ || windowTimeout)
00864 {
00865 TLOG(TLVL_DEBUG) << "Creating ContainerFragment for Buffered or Window-requested Fragments" ;
00866 frags.emplace_back(new artdaq::Fragment(req->first, fragment_id()));
00867 frags.back()->setTimestamp(ts);
00868 ContainerFragmentLoader cfl(*frags.back());
00869
00870 if (!windowClosed) cfl.set_missing_data(true);
00871 if (dataBuffer_.size() > 0 && dataBuffer_.front()->timestamp() > min)
00872 {
00873 TLOG(TLVL_DEBUG) << "Request Window covers data that is either before data collection began or has fallen off the end of the buffer" ;
00874 cfl.set_missing_data(true);
00875 }
00876
00877
00878
00879 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00880 {
00881 Fragment::timestamp_t fragT = (*it)->timestamp();
00882 if (fragT < min || fragT > max || (fragT == max && windowWidth_ > 0))
00883 {
00884 ++it;
00885 continue;
00886 }
00887
00888 TLOG(9) << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" ;
00889 cfl.addFragment(*it);
00890
00891 if (uniqueWindows_)
00892 {
00893 it = dataBuffer_.erase(it);
00894 }
00895 else
00896 {
00897 ++it;
00898 }
00899 }
00900 if (req->first == ev_counter())
00901 {
00902 ev_counter_inc(1, true);
00903 while (windows_sent_ooo_.count(ev_counter()))
00904 {
00905 TLOG(9) << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" ;
00906 windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
00907 ev_counter_inc(1, true);
00908 }
00909 }
00910 else
00911 {
00912 windows_sent_ooo_.insert(req->first);
00913 }
00914 requestReceiver_->RemoveRequest(req->first);
00915 req = requests.erase(req);
00916 last_window_send_time_ = std::chrono::steady_clock::now();
00917 }
00918 else
00919 {
00920 ++req;
00921 }
00922 }
00923 }
00924
00925 bool artdaq::CommandableFragmentGenerator::applyRequests(artdaq::FragmentPtrs& frags)
00926 {
00927 if (check_stop() || exception())
00928 {
00929 return false;
00930 }
00931
00932
00933 if (mode_ == RequestMode::Ignored)
00934 {
00935 while (dataBufferDepthFragments_ <= 0)
00936 {
00937 if (check_stop() || exception() || !isHardwareOK_) return false;
00938 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00939 dataCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return dataBufferDepthFragments_ > 0; });
00940 }
00941 }
00942 else
00943 {
00944 if ((check_stop() && requestReceiver_->size() == 0) || exception()) return false;
00945 checkDataBuffer();
00946
00947
00948 auto counter = 0;
00949
00950 while (requestReceiver_->size() == 0 && counter < 100)
00951 {
00952 if (check_stop() || exception()) return false;
00953
00954 checkDataBuffer();
00955
00956 requestReceiver_->WaitForRequests(10);
00957 counter++;
00958 }
00959 }
00960
00961 {
00962 std::unique_lock<std::mutex> dlk(dataBufferMutex_);
00963
00964 switch (mode_)
00965 {
00966 case RequestMode::Single:
00967 applyRequestsSingleMode(frags);
00968 break;
00969 case RequestMode::Window:
00970 applyRequestsWindowMode(frags);
00971 break;
00972 case RequestMode::Buffer:
00973 applyRequestsBufferMode(frags);
00974 break;
00975 case RequestMode::Ignored:
00976 default:
00977 applyRequestsIgnoredMode(frags);
00978 break;
00979 }
00980
00981 getDataBufferStats();
00982 }
00983
00984 if (frags.size() > 0)
00985 TLOG(9) << "Finished Processing Event " << std::to_string(ev_counter() + 1) << " for fragment_id " << fragment_id() << "." ;
00986 return true;
00987 }
00988
00989 bool artdaq::CommandableFragmentGenerator::sendEmptyFragment(artdaq::FragmentPtrs& frags, size_t seqId, std::string desc)
00990 {
00991 TLOG(TLVL_WARNING) << desc << " sequence ID " << seqId << ", sending empty fragment" ;
00992 for (auto fid : fragment_ids_)
00993 {
00994 auto frag = new Fragment();
00995 frag->setSequenceID(seqId);
00996 frag->setFragmentID(fid);
00997 frag->setSystemType(Fragment::EmptyFragmentType);
00998 frags.emplace_back(FragmentPtr(frag));
00999 }
01000 return true;
01001 }
01002
01003 void artdaq::CommandableFragmentGenerator::sendEmptyFragments(artdaq::FragmentPtrs& frags, std::map<Fragment::sequence_id_t, Fragment::timestamp_t>& requests)
01004 {
01005 if (requests.size() == 0 && windows_sent_ooo_.size() == 0) return;
01006
01007 if (requests.size() > 0) {
01008 TLOG(19) << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << requests.begin()->first ;
01009 while (requests.begin()->first > ev_counter())
01010 {
01011 sendEmptyFragment(frags, ev_counter(), "Missed request for");
01012 ev_counter_inc(1, true);
01013 }
01014 }
01015 else if (windows_sent_ooo_.size() > 0)
01016 {
01017 TLOG(19) << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << *windows_sent_ooo_.begin() ;
01018 while (*windows_sent_ooo_.begin() > ev_counter())
01019 {
01020 sendEmptyFragment(frags, ev_counter(), "Missed request for");
01021 ev_counter_inc(1, true);
01022 }
01023 }
01024 while (windows_sent_ooo_.count(ev_counter()))
01025 {
01026 TLOG(19) << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" ;
01027 windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
01028 ev_counter_inc(1, true);
01029 }
01030 }