00001 #define TRACE_NAME (app_name + "_CommandableFragmentGenerator").c_str() // include these 2 first -
00002 #include "artdaq/DAQdata/Globals.hh"
00003
00004 #include "artdaq/Application/CommandableFragmentGenerator.hh"
00005
00006 #include <boost/exception/all.hpp>
00007 #include <boost/throw_exception.hpp>
00008
00009 #include <limits>
00010 #include <iterator>
00011
00012 #include "canvas/Utilities/Exception.h"
00013 #include "cetlib_except/exception.h"
00014 #include "fhiclcpp/ParameterSet.h"
00015
00016 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
00017 #include "artdaq-core/Data/Fragment.hh"
00018 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
00019 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00020 #include "artdaq-core/Utilities/TimeUtils.hh"
00021
00022 #include <fstream>
00023 #include <iomanip>
00024 #include <iterator>
00025 #include <iostream>
00026 #include <iomanip>
00027 #include <algorithm>
00028 #include <sys/poll.h>
00029 #include "artdaq/DAQdata/TCPConnect.hh"
00030
00031 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator()
00032 : mutex_()
00033 , requestReceiver_(new RequestReceiver())
00034 , windowOffset_(0)
00035 , windowWidth_(0)
00036 , staleTimeout_(Fragment::InvalidTimestamp)
00037 , expectedType_(Fragment::EmptyFragmentType)
00038 , maxFragmentCount_(std::numeric_limits<size_t>::max())
00039 , uniqueWindows_(true)
00040 , missing_request_(true)
00041 , missing_request_time_()
00042 , last_window_send_time_()
00043 , last_window_send_time_set_(false)
00044 , windows_sent_ooo_()
00045 , missing_request_window_timeout_us_(1000000)
00046 , window_close_timeout_us_(2000000)
00047 , useDataThread_(false)
00048 , sleep_on_no_data_us_(0)
00049 , data_thread_running_(false)
00050 , dataBufferDepthFragments_(0)
00051 , dataBufferDepthBytes_(0)
00052 , maxDataBufferDepthFragments_(1000)
00053 , maxDataBufferDepthBytes_(1000)
00054 , useMonitoringThread_(false)
00055 , monitoringInterval_(0)
00056 , lastMonitoringCall_()
00057 , isHardwareOK_(true)
00058 , dataBuffer_()
00059 , newDataBuffer_()
00060 , run_number_(-1)
00061 , subrun_number_(-1)
00062 , timeout_(std::numeric_limits<uint64_t>::max())
00063 , timestamp_(std::numeric_limits<uint64_t>::max())
00064 , should_stop_(false)
00065 , exception_(false)
00066 , force_stop_(false)
00067 , latest_exception_report_("none")
00068 , ev_counter_(1)
00069 , board_id_(-1)
00070 , instance_name_for_metrics_("FragmentGenerator")
00071 , sleep_on_stop_us_(0)
00072 {}
00073
00074 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(const fhicl::ParameterSet& ps)
00075 : mutex_()
00076 , windowOffset_(ps.get<Fragment::timestamp_t>("request_window_offset", 0))
00077 , windowWidth_(ps.get<Fragment::timestamp_t>("request_window_width", 0))
00078 , staleTimeout_(ps.get<Fragment::timestamp_t>("stale_request_timeout", 0xFFFFFFFF))
00079 , expectedType_(ps.get<Fragment::type_t>("expected_fragment_type", Fragment::type_t(Fragment::EmptyFragmentType)))
00080 , uniqueWindows_(ps.get<bool>("request_windows_are_unique", true))
00081 , missing_request_(false)
00082 , missing_request_time_(decltype(missing_request_time_)::max())
00083 , last_window_send_time_(decltype(last_window_send_time_)::max())
00084 , last_window_send_time_set_(false)
00085 , windows_sent_ooo_()
00086 , missing_request_window_timeout_us_(ps.get<size_t>("missing_request_window_timeout_us", 1000000))
00087 , window_close_timeout_us_(ps.get<size_t>("window_close_timeout_us", 2000000))
00088 , useDataThread_(ps.get<bool>("separate_data_thread", false))
00089 , sleep_on_no_data_us_(ps.get<size_t>("sleep_on_no_data_us", 0))
00090 , data_thread_running_(false)
00091 , dataBufferDepthFragments_(0)
00092 , dataBufferDepthBytes_(0)
00093 , maxDataBufferDepthFragments_(ps.get<int>("data_buffer_depth_fragments", 1000))
00094 , maxDataBufferDepthBytes_(ps.get<size_t>("data_buffer_depth_mb", 1000) * 1024 * 1024)
00095 , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
00096 , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
00097 , lastMonitoringCall_()
00098 , isHardwareOK_(true)
00099 , dataBuffer_()
00100 , newDataBuffer_()
00101 , run_number_(-1)
00102 , subrun_number_(-1)
00103 , timeout_(std::numeric_limits<uint64_t>::max())
00104 , timestamp_(std::numeric_limits<uint64_t>::max())
00105 , should_stop_(false)
00106 , exception_(false)
00107 , force_stop_(false)
00108 , latest_exception_report_("none")
00109 , ev_counter_(1)
00110 , board_id_(-1)
00111 , sleep_on_stop_us_(0)
00112 {
00113 board_id_ = ps.get<int>("board_id");
00114 instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
00115
00116 fragment_ids_ = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
00117
00118 TLOG(TLVL_TRACE) << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)" ;
00119 int fragment_id = ps.get<int>("fragment_id", -99);
00120
00121 if (fragment_id != -99)
00122 {
00123 if (fragment_ids_.size() != 0)
00124 {
00125 latest_exception_report_ = "Error in CommandableFragmentGenerator: can't both define \"fragment_id\" and \"fragment_ids\" in FHiCL document";
00126 throw cet::exception(latest_exception_report_);
00127 }
00128 else
00129 {
00130 fragment_ids_.emplace_back(fragment_id);
00131 }
00132 }
00133
00134 sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
00135
00136 dataBuffer_.emplace_back(FragmentPtr(new Fragment()));
00137 (*dataBuffer_.begin())->setSystemType(Fragment::EmptyFragmentType);
00138
00139 std::string modeString = ps.get<std::string>("request_mode", "ignored");
00140 if (modeString == "single" || modeString == "Single")
00141 {
00142 mode_ = RequestMode::Single;
00143 }
00144 else if (modeString.find("buffer") != std::string::npos || modeString.find("Buffer") != std::string::npos)
00145 {
00146 mode_ = RequestMode::Buffer;
00147 }
00148 else if (modeString == "window" || modeString == "Window")
00149 {
00150 mode_ = RequestMode::Window;
00151 }
00152 else if (modeString.find("ignore") != std::string::npos || modeString.find("Ignore") != std::string::npos)
00153 {
00154 mode_ = RequestMode::Ignored;
00155 }
00156 TLOG(TLVL_DEBUG) << "Request mode is " << printMode_() ;
00157
00158 if (mode_ != RequestMode::Ignored)
00159 {
00160 if (!useDataThread_)
00161 {
00162 latest_exception_report_ = "Error in CommandableFragmentGenerator: use_data_thread must be true when request_mode is not \"Ignored\"!";
00163 throw cet::exception(latest_exception_report_);
00164 }
00165 requestReceiver_.reset(new RequestReceiver(ps));
00166 }
00167 }
00168
00169 artdaq::CommandableFragmentGenerator::~CommandableFragmentGenerator()
00170 {
00171 joinThreads();
00172 }
00173
00174 void artdaq::CommandableFragmentGenerator::joinThreads()
00175 {
00176 should_stop_ = true;
00177 force_stop_ = true;
00178 TLOG(TLVL_DEBUG) << "Joining dataThread" ;
00179 if (dataThread_.joinable()) dataThread_.join();
00180 TLOG(TLVL_DEBUG) << "Joining monitoringThread" ;
00181 if (monitoringThread_.joinable()) monitoringThread_.join();
00182 requestReceiver_.reset(nullptr);
00183 }
00184
00185 bool artdaq::CommandableFragmentGenerator::getNext(FragmentPtrs& output)
00186 {
00187 bool result = true;
00188
00189 if (check_stop()) usleep(sleep_on_stop_us_);
00190 if (exception() || force_stop_) return false;
00191
00192 if (!useMonitoringThread_ && monitoringInterval_ > 0)
00193 {
00194 TLOG(10) << "getNext: Checking whether to collect Monitoring Data" ;
00195 auto now = std::chrono::steady_clock::now();
00196
00197 if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
00198 {
00199 TLOG(10) << "getNext: Collecting Monitoring Data" ;
00200 isHardwareOK_ = checkHWStatus_();
00201 TLOG(10) << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ ;
00202 lastMonitoringCall_ = now;
00203 }
00204 }
00205
00206 try
00207 {
00208 std::lock_guard<std::mutex> lk(mutex_);
00209 if (useDataThread_)
00210 {
00211 TLOG(TLVL_TRACE) << "getNext: Calling applyRequests" ;
00212 result = applyRequests(output);
00213 TLOG(TLVL_TRACE) << "getNext: Done with applyRequests result=" << std::boolalpha << result;
00214
00215 if (exception())
00216 {
00217 TLOG(TLVL_ERROR) << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
00218 throw cet::exception("CommandableFragmentGenerator") << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
00219 }
00220 }
00221 else
00222 {
00223 if (!isHardwareOK_)
00224 {
00225 TLOG(TLVL_ERROR) << "Stopping CFG because the hardware reports bad status!" ;
00226 return false;
00227 }
00228 TLOG(TLVL_TRACE) << "getNext: Calling getNext_ " << std::to_string(ev_counter()) ;
00229 try
00230 {
00231 result = getNext_(output);
00232 }
00233 catch (...)
00234 {
00235 throw;
00236 }
00237 TLOG(TLVL_TRACE) << "getNext: Done with getNext_ " << std::to_string(ev_counter()) ;
00238 for (auto dataIter = output.begin(); dataIter != output.end(); ++dataIter)
00239 {
00240 TLOG(20) << "getNext: getNext_() returned fragment with sequenceID = " << (*dataIter)->sequenceID()
00241 << ", timestamp = " << (*dataIter)->timestamp() << ", and sizeBytes = " << (*dataIter)->sizeBytes();
00242 }
00243 }
00244 }
00245 catch (const cet::exception& e)
00246 {
00247 latest_exception_report_ = "cet::exception caught in getNext(): ";
00248 latest_exception_report_.append(e.what());
00249 TLOG(TLVL_ERROR) << "getNext: cet::exception caught: " << e ;
00250 set_exception(true);
00251 return false;
00252 }
00253 catch (const boost::exception& e)
00254 {
00255 latest_exception_report_ = "boost::exception caught in getNext(): ";
00256 latest_exception_report_.append(boost::diagnostic_information(e));
00257 TLOG(TLVL_ERROR) << "getNext: boost::exception caught: " << boost::diagnostic_information(e) ;
00258 set_exception(true);
00259 return false;
00260 }
00261 catch (const std::exception& e)
00262 {
00263 latest_exception_report_ = "std::exception caught in getNext(): ";
00264 latest_exception_report_.append(e.what());
00265 TLOG(TLVL_ERROR) << "getNext: std::exception caught: " << e.what() ;
00266 set_exception(true);
00267 return false;
00268 }
00269 catch (...)
00270 {
00271 latest_exception_report_ = "Unknown exception caught in getNext().";
00272 TLOG(TLVL_ERROR) << "getNext: unknown exception caught" ;
00273 set_exception(true);
00274 return false;
00275 }
00276
00277 if (!result)
00278 {
00279 TLOG(TLVL_DEBUG) << "stopped " ;
00280 }
00281
00282 if (metricMan && !output.empty()) {
00283
00284 auto timestamp = output.front()->timestamp();
00285
00286 if (output.size() > 1) {
00287 for (auto& outputfrag : output ) {
00288 if (outputfrag->timestamp() > timestamp) {
00289 timestamp = outputfrag->timestamp();
00290 }
00291 }
00292 }
00293
00294 metricMan->sendMetric("Last Timestamp", timestamp, "Ticks", 1,
00295 MetricMode::LastPoint, app_name);
00296 }
00297
00298 return result;
00299 }
00300
00301 bool artdaq::CommandableFragmentGenerator::check_stop()
00302 {
00303 TLOG(14) << "CFG::check_stop: should_stop=" << should_stop() << ", useDataThread_=" << useDataThread_ << ", exception status =" << int(exception()) ;
00304
00305 if (!should_stop()) return false;
00306 if (!useDataThread_ || mode_ == RequestMode::Ignored) return true;
00307 if (force_stop_) return true;
00308
00309
00310 return !requestReceiver_->isRunning();
00311 }
00312
00313 int artdaq::CommandableFragmentGenerator::fragment_id() const
00314 {
00315 if (fragment_ids_.size() != 1)
00316 {
00317 throw cet::exception("Error in CommandableFragmentGenerator: can't call fragment_id() unless member fragment_ids_ vector is length 1");
00318 }
00319 else
00320 {
00321 return fragment_ids_[0];
00322 }
00323 }
00324
00325 size_t artdaq::CommandableFragmentGenerator::ev_counter_inc(size_t step, bool force)
00326 {
00327 if (force || mode_ == RequestMode::Ignored)
00328 {
00329 return ev_counter_.fetch_add(step);
00330 }
00331 return ev_counter_.load();
00332 }
00333
00334 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
00335 {
00336 if (run < 0) throw cet::exception("CommandableFragmentGenerator") << "negative run number";
00337
00338 timeout_ = timeout;
00339 timestamp_ = timestamp;
00340 ev_counter_.store(1);
00341 missing_request_ = false;
00342 should_stop_.store(false);
00343 exception_.store(false);
00344 run_number_ = run;
00345 subrun_number_ = 1;
00346 latest_exception_report_ = "none";
00347 dataBuffer_.clear();
00348 last_window_send_time_set_ = false;
00349 windows_sent_ooo_.clear();
00350
00351 start();
00352
00353 std::unique_lock<std::mutex> lk(mutex_);
00354 if (useDataThread_) startDataThread();
00355 if (useMonitoringThread_) startMonitoringThread();
00356 if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
00357 }
00358
00359 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
00360 {
00361 TLOG(TLVL_DEBUG) << "Stop Command received." ;
00362
00363 timeout_ = timeout;
00364 timestamp_ = timestamp;
00365 if (requestReceiver_ && requestReceiver_->isRunning()) requestReceiver_->stopRequestReceiverThread();
00366
00367 stopNoMutex();
00368 should_stop_.store(true);
00369 std::unique_lock<std::mutex> lk(mutex_);
00370 stop();
00371 TLOG(TLVL_DEBUG) << "Stop command complete.";
00372 }
00373
00374 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
00375 {
00376 timeout_ = timeout;
00377 timestamp_ = timestamp;
00378 if (requestReceiver_->isRunning()) requestReceiver_->stopRequestReceiverThread();
00379
00380 pauseNoMutex();
00381 should_stop_.store(true);
00382 std::unique_lock<std::mutex> lk(mutex_);
00383
00384 pause();
00385 }
00386
00387 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
00388 {
00389 timeout_ = timeout;
00390 timestamp_ = timestamp;
00391
00392 subrun_number_ += 1;
00393 should_stop_ = false;
00394
00395 dataBuffer_.clear();
00396
00397
00398 resume();
00399
00400 std::unique_lock<std::mutex> lk(mutex_);
00401 if (useDataThread_) startDataThread();
00402 if (useMonitoringThread_) startMonitoringThread();
00403 if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
00404 }
00405
00406 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
00407 {
00408 std::lock_guard<std::mutex> lk(mutex_);
00409
00410
00411
00412
00413
00414
00415 std::string childReport = reportSpecific(which);
00416 if (childReport.length() > 0) { return childReport; }
00417
00418
00419 if (which == "latest_exception")
00420 {
00421 return latest_exception_report_;
00422 }
00423
00424
00425 childReport = report();
00426 if (childReport.length() > 0) { return childReport; }
00427
00428
00429 std::string tmpString = "The \"" + which + "\" command is not ";
00430 tmpString.append("currently supported by the ");
00431 tmpString.append(metricsReportingInstanceName());
00432 tmpString.append(" fragment generator.");
00433 return tmpString;
00434 }
00435
00436
00437 void artdaq::CommandableFragmentGenerator::pauseNoMutex()
00438 {
00439 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
00440 }
00441
00442 void artdaq::CommandableFragmentGenerator::pause()
00443 {
00444 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
00445 }
00446
00447 void artdaq::CommandableFragmentGenerator::resume()
00448 {
00449 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
00450 }
00451
00452 std::string artdaq::CommandableFragmentGenerator::report()
00453 {
00454 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
00455 return "";
00456 }
00457
00458 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const&)
00459 {
00460 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
00461 return "";
00462 }
00463
00464 bool artdaq::CommandableFragmentGenerator::checkHWStatus_()
00465 {
00466 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
00467 return true;
00468 }
00469
00470 bool artdaq::CommandableFragmentGenerator::metaCommand(std::string const&, std::string const&)
00471 {
00472 #pragma message "Using default implementation of CommandableFragmentGenerator::metaCommand(std::string, std::string)"
00473 return true;
00474 }
00475
00476 void artdaq::CommandableFragmentGenerator::startDataThread()
00477 {
00478 if (dataThread_.joinable()) dataThread_.join();
00479 TLOG(TLVL_INFO) << "Starting Data Receiver Thread" ;
00480 dataThread_ = boost::thread(&CommandableFragmentGenerator::getDataLoop, this);
00481 }
00482
00483 void artdaq::CommandableFragmentGenerator::startMonitoringThread()
00484 {
00485 if (monitoringThread_.joinable()) monitoringThread_.join();
00486 TLOG(TLVL_INFO) << "Starting Hardware Monitoring Thread" ;
00487 monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
00488 }
00489
00490 std::string artdaq::CommandableFragmentGenerator::printMode_()
00491 {
00492 switch (mode_)
00493 {
00494 case RequestMode::Single:
00495 return "Single";
00496 case RequestMode::Buffer:
00497 return "Buffer";
00498 case RequestMode::Window:
00499 return "Window";
00500 case RequestMode::Ignored:
00501 return "Ignored";
00502 }
00503
00504 return "ERROR";
00505 }
00506
00507 void artdaq::CommandableFragmentGenerator::getDataLoop()
00508 {
00509 data_thread_running_ = true;
00510 while (!force_stop_)
00511 {
00512 if (!isHardwareOK_)
00513 {
00514 TLOG(TLVL_DEBUG) << "getDataLoop: isHardwareOK is " << isHardwareOK_ << ", aborting data thread" ;
00515 data_thread_running_ = false;
00516 return;
00517 }
00518
00519 TLOG(13) << "getDataLoop: calling getNext_" ;
00520
00521 bool data = false;
00522 auto startdata = std::chrono::steady_clock::now();
00523
00524 try
00525 {
00526 data = getNext_(newDataBuffer_);
00527 }
00528 catch (...)
00529 {
00530 ExceptionHandler(ExceptionHandlerRethrow::no,
00531 "Exception thrown by fragment generator in CommandableFragmentGenerator::getDataLoop; setting exception state to \"true\"");
00532 set_exception(true);
00533
00534 data_thread_running_ = false;
00535 return;
00536 }
00537 for (auto dataIter = newDataBuffer_.begin(); dataIter != newDataBuffer_.end(); ++dataIter)
00538 {
00539 TLOG(20) << "getDataLoop: getNext_() returned fragment with sequenceID = " << (*dataIter)->sequenceID()
00540 << ", timestamp = " << (*dataIter)->timestamp() << ", and sizeBytes = " << (*dataIter)->sizeBytes();
00541 }
00542
00543 if (metricMan)
00544 {
00545 metricMan->sendMetric("Avg Data Acquisition Time", TimeUtils::GetElapsedTime(startdata), "s", 3, artdaq::MetricMode::Average);
00546 }
00547
00548 if (newDataBuffer_.size() == 0 && sleep_on_no_data_us_ > 0)
00549 {
00550 usleep(sleep_on_no_data_us_);
00551 }
00552
00553 TLOG(15) << "Waiting for data buffer ready" ;
00554 if (!waitForDataBufferReady()) return;
00555 TLOG(15) << "Done waiting for data buffer ready" ;
00556
00557 TLOG(13) << "getDataLoop: processing data" ;
00558 if (data && !force_stop_)
00559 {
00560 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00561 switch (mode_)
00562 {
00563 case RequestMode::Single:
00564
00565 while (newDataBuffer_.size() >= fragment_ids_.size())
00566 {
00567 dataBuffer_.clear();
00568 auto it = newDataBuffer_.begin();
00569 std::advance(it, fragment_ids_.size());
00570 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_, newDataBuffer_.begin(), it);
00571 }
00572 break;
00573 case RequestMode::Buffer:
00574 case RequestMode::Ignored:
00575 case RequestMode::Window:
00576 default:
00577
00578 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_);
00579 break;
00580 }
00581 getDataBufferStats();
00582 }
00583
00584 {
00585 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00586 if (dataBuffer_.size() > 0)
00587 {
00588 dataCondition_.notify_all();
00589 }
00590 }
00591 if (!data || force_stop_)
00592 {
00593 TLOG(TLVL_INFO) << "Data flow has stopped. Ending data collection thread" ;
00594 data_thread_running_ = false;
00595 if (requestReceiver_) requestReceiver_->ClearRequests();
00596 dataBuffer_.clear();
00597 newDataBuffer_.clear();
00598 return;
00599 }
00600 }
00601 }
00602
00603 bool artdaq::CommandableFragmentGenerator::waitForDataBufferReady()
00604 {
00605 auto startwait = std::chrono::steady_clock::now();
00606 auto first = true;
00607 auto lastwaittime = 0ULL;
00608 while (dataBufferIsTooLarge())
00609 {
00610 if (should_stop())
00611 {
00612 TLOG(TLVL_DEBUG) << "Run ended while waiting for buffer to shrink!" ;
00613 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00614 getDataBufferStats();
00615 dataCondition_.notify_all();
00616 data_thread_running_ = false;
00617 return false;
00618 }
00619 auto waittime = TimeUtils::GetElapsedTimeMilliseconds(startwait);
00620
00621 if (first || (waittime != lastwaittime && waittime % 1000 == 0))
00622 {
00623 TLOG(TLVL_WARNING) << "Bad Omen: Data Buffer has exceeded its size limits. "
00624 << "(seq_id=" << ev_counter()
00625 << ", frags=" << dataBufferDepthFragments_ << "/" << maxDataBufferDepthFragments_
00626 << ", szB=" << dataBufferDepthBytes_ << "/" << maxDataBufferDepthBytes_ << ")" ;
00627 TLOG(TLVL_TRACE) << "Bad Omen: Possible causes include requests not getting through or Ignored-mode BR issues" ;
00628 first = false;
00629 }
00630 if (waittime % 5 && waittime != lastwaittime)
00631 {
00632 TLOG(13) << "getDataLoop: Data Retreival paused for " << std::to_string(waittime) << " ms waiting for data buffer to drain" ;
00633 }
00634 lastwaittime = waittime;
00635 usleep(1000);
00636 }
00637 return true;
00638 }
00639
00640 bool artdaq::CommandableFragmentGenerator::dataBufferIsTooLarge()
00641 {
00642 return (maxDataBufferDepthFragments_ > 0 && dataBufferDepthFragments_ > maxDataBufferDepthFragments_) || (maxDataBufferDepthBytes_ > 0 && dataBufferDepthBytes_ > maxDataBufferDepthBytes_);
00643 }
00644
00645 void artdaq::CommandableFragmentGenerator::getDataBufferStats()
00646 {
00648 dataBufferDepthFragments_ = dataBuffer_.size();
00649 size_t acc = 0;
00650 TLOG(15) << "getDataBufferStats: Calculating buffer size" ;
00651 for (auto i = dataBuffer_.begin(); i != dataBuffer_.end(); ++i)
00652 {
00653 if (i->get() != nullptr)
00654 {
00655 acc += (*i)->sizeBytes();
00656 }
00657 }
00658 dataBufferDepthBytes_ = acc;
00659
00660 if (metricMan)
00661 {
00662 TLOG(15) << "getDataBufferStats: Sending Metrics" ;
00663 metricMan->sendMetric("Buffer Depth Fragments", dataBufferDepthFragments_.load(), "fragments", 1, MetricMode::LastPoint);
00664 metricMan->sendMetric("Buffer Depth Bytes", dataBufferDepthBytes_.load(), "bytes", 1, MetricMode::LastPoint);
00665 }
00666 TLOG(15) << "getDataBufferStats: frags=" << dataBufferDepthFragments_.load() << "/" << maxDataBufferDepthFragments_
00667 << ", sz=" << std::to_string(dataBufferDepthBytes_.load()) << "/" << std::to_string(maxDataBufferDepthBytes_) ;
00668 }
00669
00670 void artdaq::CommandableFragmentGenerator::checkDataBuffer()
00671 {
00672 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00673 dataCondition_.wait_for(lock, std::chrono::milliseconds(10));
00674 if (dataBufferDepthFragments_ > 0)
00675 {
00676 if ((mode_ == RequestMode::Buffer || mode_ == RequestMode::Window))
00677 {
00678
00679 while (dataBufferIsTooLarge())
00680 {
00681 dataBuffer_.erase(dataBuffer_.begin());
00682 getDataBufferStats();
00683 }
00684 if (dataBuffer_.size() > 0)
00685 {
00686 TLOG(17) << "Determining if Fragments can be dropped from data buffer" ;
00687 Fragment::timestamp_t last = dataBuffer_.back()->timestamp();
00688 Fragment::timestamp_t min = last > staleTimeout_ ? last - staleTimeout_ : 0;
00689 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00690 {
00691 if ((*it)->timestamp() < min)
00692 {
00693 it = dataBuffer_.erase(it);
00694 }
00695 else
00696 {
00697 ++it;
00698 }
00699 }
00700 getDataBufferStats();
00701 }
00702 }
00703 else if (mode_ == RequestMode::Single && dataBuffer_.size() > fragment_ids_.size())
00704 {
00705
00706 while (dataBuffer_.size() > fragment_ids_.size())
00707 {
00708 dataBuffer_.erase(dataBuffer_.begin());
00709 }
00710 }
00711 }
00712 }
00713
00714 void artdaq::CommandableFragmentGenerator::getMonitoringDataLoop()
00715 {
00716 while (!force_stop_)
00717 {
00718 if (should_stop() || monitoringInterval_ <= 0)
00719 {
00720 TLOG(TLVL_DEBUG) << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
00721 << " and monitoringInterval is " << monitoringInterval_ << ", returning" ;
00722 return;
00723 }
00724 TLOG(12) << "getMonitoringDataLoop: Determining whether to call checkHWStatus_" ;
00725
00726 auto now = std::chrono::steady_clock::now();
00727 if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
00728 {
00729 isHardwareOK_ = checkHWStatus_();
00730 TLOG(12) << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ ;
00731 lastMonitoringCall_ = now;
00732 }
00733 usleep(monitoringInterval_ / 10);
00734 }
00735 }
00736
00737 void artdaq::CommandableFragmentGenerator::applyRequestsIgnoredMode(artdaq::FragmentPtrs& frags)
00738 {
00739
00740 TLOG(9) << "Mode is Ignored; Copying data to output" ;
00741 std::move(dataBuffer_.begin(), dataBuffer_.end(), std::inserter(frags, frags.end()));
00742 dataBuffer_.clear();
00743 }
00744
00745 void artdaq::CommandableFragmentGenerator::applyRequestsSingleMode(artdaq::FragmentPtrs& frags)
00746 {
00747
00748 auto requests = requestReceiver_->GetRequests();
00749 while (requests.size() > 1) {
00750
00751 requestReceiver_->RemoveRequest(requests.begin()->first);
00752 requests.erase(requests.begin());
00753 }
00754 sendEmptyFragments(frags, requests);
00755
00756
00757 if (requests.size() == 0 || !requests.count(ev_counter())) return;
00758
00759 if (dataBuffer_.size() > 0)
00760 {
00761 TLOG(9) << "Mode is Single; Sending copy of last event" ;
00762 for (auto& fragptr : dataBuffer_)
00763 {
00764
00765 auto frag = fragptr.get();
00766 auto newfrag = std::unique_ptr<artdaq::Fragment>(new Fragment(ev_counter(), frag->fragmentID()));
00767 newfrag->resize(frag->size() - detail::RawFragmentHeader::num_words());
00768 memcpy(newfrag->headerAddress(), frag->headerAddress(), frag->sizeBytes());
00769 newfrag->setTimestamp(requests[ev_counter()]);
00770 newfrag->setSequenceID(ev_counter());
00771 frags.push_back(std::move(newfrag));
00772 }
00773 }
00774 else
00775 {
00776 sendEmptyFragment(frags, ev_counter(), "No data for");
00777 }
00778 requestReceiver_->RemoveRequest(ev_counter());
00779 ev_counter_inc(1, true);
00780 }
00781
00782 void artdaq::CommandableFragmentGenerator::applyRequestsBufferMode(artdaq::FragmentPtrs& frags)
00783 {
00784
00785 auto requests = requestReceiver_->GetRequests();
00786 while (requests.size() > 1) {
00787
00788 requestReceiver_->RemoveRequest(requests.begin()->first);
00789 requests.erase(requests.begin());
00790 }
00791 sendEmptyFragments(frags, requests);
00792
00793
00794 if (requests.size() == 0 || !requests.count(ev_counter())) return;
00795
00796 TLOG(TLVL_DEBUG) << "Creating ContainerFragment for Buffered Fragments" ;
00797 frags.emplace_back(new artdaq::Fragment(ev_counter(), fragment_id()));
00798 frags.back()->setTimestamp(requests[ev_counter()]);
00799 ContainerFragmentLoader cfl(*frags.back());
00800 cfl.set_missing_data(false);
00801
00802
00803
00804 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00805 {
00806 TLOG(9) << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" ;
00807 cfl.addFragment(*it);
00808 it = dataBuffer_.erase(it);
00809 }
00810 requestReceiver_->RemoveRequest(ev_counter());
00811 ev_counter_inc(1, true);
00812 }
00813
00814 void artdaq::CommandableFragmentGenerator::applyRequestsWindowMode(artdaq::FragmentPtrs& frags)
00815 {
00816 TLOG(10) << "applyRequestsWindowMode BEGIN";
00817 if (!last_window_send_time_set_)
00818 {
00819 last_window_send_time_ = std::chrono::steady_clock::now();
00820 last_window_send_time_set_ = true;
00821 }
00822
00823 auto requests = requestReceiver_->GetRequests();
00824 bool now_have_desired_request = std::any_of(requests.begin(), requests.end(),
00825 [this](decltype(requests)::value_type& request) {
00826 return request.first == ev_counter(); });
00827
00828 if (missing_request_)
00829 {
00830 if (!now_have_desired_request && TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) > missing_request_window_timeout_us_)
00831 {
00832 TLOG(TLVL_ERROR) << "Data-taking has paused for " << TimeUtils::GetElapsedTimeMicroseconds(missing_request_time_) << " us "
00833 << "(> " << std::to_string(missing_request_window_timeout_us_) << " us) while waiting for missing data request messages."
00834 << " Sending Empty Fragments for missing requests!" ;
00835 sendEmptyFragments(frags, requests);
00836
00837 missing_request_ = false;
00838 missing_request_time_ = decltype(missing_request_time_)::max();
00839 }
00840 else if (now_have_desired_request) {
00841 missing_request_ = false;
00842 missing_request_time_ = decltype(missing_request_time_)::max();
00843 }
00844 }
00845
00846 TLOG(10) << "applyRequestsWindowMode: Starting request processing";
00847 for (auto req = requests.begin(); req != requests.end();)
00848 {
00849 TLOG(10, "CommandableFragmentGenerator") << "applyRequestsWindowMode: processing request with sequence ID " << \
00850 req->first << ", timestamp " << req->second;
00851
00852
00853 while (req->first < ev_counter() && requests.size() > 0)
00854 {
00855 TLOG(10) << "applyRequestsWindowMode: Clearing passed request for sequence ID " << req->first;
00856 requestReceiver_->RemoveRequest(req->first);
00857 req = requests.erase(req);
00858 }
00859 if (requests.size() == 0) break;
00860 if (req->first > ev_counter())
00861 {
00862 if (!missing_request_)
00863 {
00864 missing_request_ = true;
00865 missing_request_time_ = std::chrono::steady_clock::now();
00866 }
00867 }
00868 auto ts = req->second;
00869 TLOG(9) << "ApplyRequests: Checking that data exists for request window " << std::to_string(req->first) ;
00870 Fragment::timestamp_t min = ts > windowOffset_ ? ts - windowOffset_ : 0;
00871 Fragment::timestamp_t max = min + windowWidth_;
00872 TLOG(9) << "ApplyRequests: min is " << std::to_string(min) << ", max is " << std::to_string(max)
00873 << " and last point in buffer is " << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0)) << " (sz=" << std::to_string(dataBuffer_.size()) << ")" ;
00874 bool windowClosed = dataBuffer_.size() > 0 && dataBuffer_.back()->timestamp() >= max;
00875 bool windowTimeout = TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) > window_close_timeout_us_;
00876 if (windowTimeout)
00877 {
00878 TLOG(TLVL_WARNING) << "A timeout occurred waiting for data to close the request window (max=" << std::to_string(max)
00879 << ", buffer=" << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0))
00880 << " (if no buffer in memory, this is shown as a 0)). Time waiting: "
00881 << TimeUtils::GetElapsedTimeMicroseconds(last_window_send_time_) << " us "
00882 << "(> " << std::to_string(window_close_timeout_us_) << " us)." ;
00883
00884 if (missing_request_) {
00885 TLOG(TLVL_ERROR) << "A Window timeout has occurred while there are pending requests. Sending empties." ;
00886 sendEmptyFragments(frags, requests);
00887 }
00888 }
00889 if (windowClosed || !data_thread_running_ || windowTimeout)
00890 {
00891 TLOG(TLVL_DEBUG) << "Creating ContainerFragment for Buffered or Window-requested Fragments" ;
00892 frags.emplace_back(new artdaq::Fragment(req->first, fragment_id()));
00893 frags.back()->setTimestamp(ts);
00894 ContainerFragmentLoader cfl(*frags.back());
00895
00896 if (!windowClosed) cfl.set_missing_data(true);
00897 if (dataBuffer_.size() > 0 && dataBuffer_.front()->timestamp() > min)
00898 {
00899 TLOG(TLVL_DEBUG) << "Request Window covers data that is either before data collection began or has fallen off the end of the buffer" ;
00900 cfl.set_missing_data(true);
00901 }
00902
00903
00904
00905 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00906 {
00907 Fragment::timestamp_t fragT = (*it)->timestamp();
00908 if (fragT < min || fragT > max || (fragT == max && windowWidth_ > 0))
00909 {
00910 ++it;
00911 continue;
00912 }
00913
00914 TLOG(9) << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" ;
00915 cfl.addFragment(*it);
00916
00917 if (uniqueWindows_)
00918 {
00919 it = dataBuffer_.erase(it);
00920 }
00921 else
00922 {
00923 ++it;
00924 }
00925 }
00926 if (req->first == ev_counter())
00927 {
00928 ev_counter_inc(1, true);
00929 while (windows_sent_ooo_.count(ev_counter()))
00930 {
00931 TLOG(9) << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" ;
00932 windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
00933 ev_counter_inc(1, true);
00934 }
00935 }
00936 else
00937 {
00938 windows_sent_ooo_.insert(req->first);
00939 }
00940 requestReceiver_->RemoveRequest(req->first);
00941 req = requests.erase(req);
00942 last_window_send_time_ = std::chrono::steady_clock::now();
00943 }
00944 else
00945 {
00946 ++req;
00947 }
00948 }
00949 }
00950
00951 bool artdaq::CommandableFragmentGenerator::applyRequests(artdaq::FragmentPtrs& frags)
00952 {
00953 if (check_stop() || exception())
00954 {
00955 return false;
00956 }
00957
00958
00959 if (mode_ == RequestMode::Ignored)
00960 {
00961 while (dataBufferDepthFragments_ <= 0)
00962 {
00963 if (check_stop() || exception() || !isHardwareOK_) return false;
00964 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00965 dataCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return dataBufferDepthFragments_ > 0; });
00966 }
00967 }
00968 else
00969 {
00970 if ((check_stop() && requestReceiver_->size() == 0) || exception()) return false;
00971 checkDataBuffer();
00972
00973
00974 auto counter = 0;
00975
00976 while (requestReceiver_->size() == 0 && counter < 100)
00977 {
00978 if (check_stop() || exception()) return false;
00979
00980 checkDataBuffer();
00981
00982 requestReceiver_->WaitForRequests(10);
00983 counter++;
00984 }
00985 }
00986
00987 {
00988 std::unique_lock<std::mutex> dlk(dataBufferMutex_);
00989
00990 switch (mode_)
00991 {
00992 case RequestMode::Single:
00993 applyRequestsSingleMode(frags);
00994 break;
00995 case RequestMode::Window:
00996 applyRequestsWindowMode(frags);
00997 break;
00998 case RequestMode::Buffer:
00999 applyRequestsBufferMode(frags);
01000 break;
01001 case RequestMode::Ignored:
01002 default:
01003 applyRequestsIgnoredMode(frags);
01004 break;
01005 }
01006
01007 getDataBufferStats();
01008 }
01009
01010 if (frags.size() > 0)
01011 TLOG(9) << "Finished Processing Event " << std::to_string(ev_counter() + 1) << " for fragment_id " << fragment_id() << "." ;
01012 return true;
01013 }
01014
01015 bool artdaq::CommandableFragmentGenerator::sendEmptyFragment(artdaq::FragmentPtrs& frags, size_t seqId, std::string desc)
01016 {
01017 TLOG(TLVL_WARNING) << desc << " sequence ID " << seqId << ", sending empty fragment" ;
01018 for (auto fid : fragment_ids_)
01019 {
01020 auto frag = new Fragment();
01021 frag->setSequenceID(seqId);
01022 frag->setFragmentID(fid);
01023 frag->setSystemType(Fragment::EmptyFragmentType);
01024 frags.emplace_back(FragmentPtr(frag));
01025 }
01026 return true;
01027 }
01028
01029 void artdaq::CommandableFragmentGenerator::sendEmptyFragments(artdaq::FragmentPtrs& frags, std::map<Fragment::sequence_id_t, Fragment::timestamp_t>& requests)
01030 {
01031 if (requests.size() == 0 && windows_sent_ooo_.size() == 0) return;
01032
01033 if (requests.size() > 0) {
01034 TLOG(19) << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << requests.begin()->first ;
01035 while (requests.begin()->first > ev_counter())
01036 {
01037 sendEmptyFragment(frags, ev_counter(), "Missed request for");
01038 ev_counter_inc(1, true);
01039 }
01040 }
01041 else if (windows_sent_ooo_.size() > 0)
01042 {
01043 TLOG(19) << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << *windows_sent_ooo_.begin() ;
01044 while (*windows_sent_ooo_.begin() > ev_counter())
01045 {
01046 sendEmptyFragment(frags, ev_counter(), "Missed request for");
01047 ev_counter_inc(1, true);
01048 }
01049 }
01050 while (windows_sent_ooo_.count(ev_counter()))
01051 {
01052 TLOG(19) << "Data-taking has caught up to out-of-order window request " << ev_counter() << ", removing from list" ;
01053 windows_sent_ooo_.erase(windows_sent_ooo_.begin(), windows_sent_ooo_.find(ev_counter()));
01054 ev_counter_inc(1, true);
01055 }
01056 }