00001 #define TRACE_NAME (app_name + "_CommandableFragmentGenerator").c_str() // include these 2 first -
00002 #include "artdaq/DAQdata/Globals.hh"
00003
00004 #include "artdaq/Application/CommandableFragmentGenerator.hh"
00005
00006 #include <boost/exception/all.hpp>
00007 #include <boost/throw_exception.hpp>
00008
00009 #include <limits>
00010 #include <iterator>
00011
00012 #include "canvas/Utilities/Exception.h"
00013 #include "cetlib_except/exception.h"
00014 #include "fhiclcpp/ParameterSet.h"
00015
00016 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
00017 #include "artdaq-core/Data/Fragment.hh"
00018 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
00019 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00020 #include "artdaq-core/Utilities/TimeUtils.hh"
00021
00022 #include <fstream>
00023 #include <iomanip>
00024 #include <iterator>
00025 #include <iostream>
00026 #include <iomanip>
00027 #include <algorithm>
00028 #include <sys/poll.h>
00029 #include "artdaq/DAQdata/TCPConnect.hh"
00030
00031 #define TLVL_GETNEXT 10
00032 #define TLVL_GETNEXT_VERBOSE 20
00033 #define TLVL_CHECKSTOP 11
00034 #define TLVL_EVCOUNTERINC 12
00035 #define TLVL_GETDATALOOP 13
00036 #define TLVL_GETDATALOOP_DATABUFFWAIT 21
00037 #define TLVL_GETDATALOOP_VERBOSE 20
00038 #define TLVL_WAITFORBUFFERREADY 15
00039 #define TLVL_GETBUFFERSTATS 16
00040 #define TLVL_CHECKDATABUFFER 17
00041 #define TLVL_GETMONITORINGDATA 18
00042 #define TLVL_APPLYREQUESTS 9
00043 #define TLVL_SENDEMPTYFRAGMENTS 19
00044 #define TLVL_CHECKWINDOWS 14
00045
00046 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator()
00047 : mutex_()
00048 , requestReceiver_(nullptr)
00049 , windowOffset_(0)
00050 , windowWidth_(0)
00051 , staleTimeout_(Fragment::InvalidTimestamp)
00052 , expectedType_(Fragment::EmptyFragmentType)
00053 , maxFragmentCount_(std::numeric_limits<size_t>::max())
00054 , uniqueWindows_(true)
00055 , windows_sent_ooo_()
00056 , missing_request_window_timeout_us_(1000000)
00057 , window_close_timeout_us_(2000000)
00058 , useDataThread_(false)
00059 , circularDataBufferMode_(false)
00060 , sleep_on_no_data_us_(0)
00061 , data_thread_running_(false)
00062 , dataBufferDepthFragments_(0)
00063 , dataBufferDepthBytes_(0)
00064 , maxDataBufferDepthFragments_(1000)
00065 , maxDataBufferDepthBytes_(1000)
00066 , useMonitoringThread_(false)
00067 , monitoringInterval_(0)
00068 , lastMonitoringCall_()
00069 , isHardwareOK_(true)
00070 , dataBuffer_()
00071 , newDataBuffer_()
00072 , run_number_(-1)
00073 , subrun_number_(-1)
00074 , timeout_(std::numeric_limits<uint64_t>::max())
00075 , timestamp_(std::numeric_limits<uint64_t>::max())
00076 , should_stop_(false)
00077 , exception_(false)
00078 , force_stop_(false)
00079 , latest_exception_report_("none")
00080 , ev_counter_(1)
00081 , board_id_(-1)
00082 , instance_name_for_metrics_("FragmentGenerator")
00083 , sleep_on_stop_us_(0)
00084 {}
00085
00086 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(const fhicl::ParameterSet& ps)
00087 : mutex_()
00088 , requestReceiver_(nullptr)
00089 , windowOffset_(ps.get<Fragment::timestamp_t>("request_window_offset", 0))
00090 , windowWidth_(ps.get<Fragment::timestamp_t>("request_window_width", 0))
00091 , staleTimeout_(ps.get<Fragment::timestamp_t>("stale_request_timeout", 0xFFFFFFFF))
00092 , expectedType_(ps.get<Fragment::type_t>("expected_fragment_type", Fragment::type_t(Fragment::EmptyFragmentType)))
00093 , uniqueWindows_(ps.get<bool>("request_windows_are_unique", true))
00094 , windows_sent_ooo_()
00095 , missing_request_window_timeout_us_(ps.get<size_t>("missing_request_window_timeout_us", 5000000))
00096 , window_close_timeout_us_(ps.get<size_t>("window_close_timeout_us", 2000000))
00097 , useDataThread_(ps.get<bool>("separate_data_thread", false))
00098 , circularDataBufferMode_(ps.get<bool>("circular_buffer_mode", false))
00099 , sleep_on_no_data_us_(ps.get<size_t>("sleep_on_no_data_us", 0))
00100 , data_thread_running_(false)
00101 , dataBufferDepthFragments_(0)
00102 , dataBufferDepthBytes_(0)
00103 , maxDataBufferDepthFragments_(ps.get<int>("data_buffer_depth_fragments", 1000))
00104 , maxDataBufferDepthBytes_(ps.get<size_t>("data_buffer_depth_mb", 1000) * 1024 * 1024)
00105 , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
00106 , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
00107 , lastMonitoringCall_()
00108 , isHardwareOK_(true)
00109 , dataBuffer_()
00110 , newDataBuffer_()
00111 , run_number_(-1)
00112 , subrun_number_(-1)
00113 , timeout_(std::numeric_limits<uint64_t>::max())
00114 , timestamp_(std::numeric_limits<uint64_t>::max())
00115 , should_stop_(false)
00116 , exception_(false)
00117 , force_stop_(false)
00118 , latest_exception_report_("none")
00119 , ev_counter_(1)
00120 , board_id_(-1)
00121 , sleep_on_stop_us_(0)
00122 {
00123 board_id_ = ps.get<int>("board_id");
00124 instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
00125
00126 fragment_ids_ = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
00127
00128 TLOG(TLVL_TRACE) << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)";
00129 int fragment_id = ps.get<int>("fragment_id", -99);
00130
00131 if (fragment_id != -99)
00132 {
00133 if (fragment_ids_.size() != 0)
00134 {
00135 latest_exception_report_ = "Error in CommandableFragmentGenerator: can't both define \"fragment_id\" and \"fragment_ids\" in FHiCL document";
00136 throw cet::exception(latest_exception_report_);
00137 }
00138 else
00139 {
00140 fragment_ids_.emplace_back(fragment_id);
00141 }
00142 }
00143
00144 sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
00145
00146 dataBuffer_.emplace_back(FragmentPtr(new Fragment()));
00147 (*dataBuffer_.begin())->setSystemType(Fragment::EmptyFragmentType);
00148
00149 std::string modeString = ps.get<std::string>("request_mode", "ignored");
00150 if (modeString == "single" || modeString == "Single")
00151 {
00152 mode_ = RequestMode::Single;
00153 }
00154 else if (modeString.find("buffer") != std::string::npos || modeString.find("Buffer") != std::string::npos)
00155 {
00156 mode_ = RequestMode::Buffer;
00157 }
00158 else if (modeString == "window" || modeString == "Window")
00159 {
00160 mode_ = RequestMode::Window;
00161 }
00162 else if (modeString.find("ignore") != std::string::npos || modeString.find("Ignore") != std::string::npos)
00163 {
00164 mode_ = RequestMode::Ignored;
00165 }
00166 TLOG(TLVL_DEBUG) << "Request mode is " << printMode_();
00167
00168 if (mode_ != RequestMode::Ignored)
00169 {
00170 if (!useDataThread_)
00171 {
00172 latest_exception_report_ = "Error in CommandableFragmentGenerator: use_data_thread must be true when request_mode is not \"Ignored\"!";
00173 throw cet::exception(latest_exception_report_);
00174 }
00175 requestReceiver_.reset(new RequestReceiver(ps));
00176 }
00177 }
00178
00179 artdaq::CommandableFragmentGenerator::~CommandableFragmentGenerator()
00180 {
00181 joinThreads();
00182 requestReceiver_.reset(nullptr);
00183 }
00184
00185 void artdaq::CommandableFragmentGenerator::joinThreads()
00186 {
00187 should_stop_ = true;
00188 force_stop_ = true;
00189 TLOG(TLVL_DEBUG) << "Joining dataThread";
00190 if (dataThread_.joinable()) dataThread_.join();
00191 TLOG(TLVL_DEBUG) << "Joining monitoringThread";
00192 if (monitoringThread_.joinable()) monitoringThread_.join();
00193 TLOG(TLVL_DEBUG) << "joinThreads complete";
00194 }
00195
00196 bool artdaq::CommandableFragmentGenerator::getNext(FragmentPtrs& output)
00197 {
00198 bool result = true;
00199
00200 if (check_stop()) usleep(sleep_on_stop_us_);
00201 if (exception() || force_stop_) return false;
00202
00203 if (!useMonitoringThread_ && monitoringInterval_ > 0)
00204 {
00205 TLOG(TLVL_GETNEXT) << "getNext: Checking whether to collect Monitoring Data";
00206 auto now = std::chrono::steady_clock::now();
00207
00208 if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
00209 {
00210 TLOG(TLVL_GETNEXT) << "getNext: Collecting Monitoring Data";
00211 isHardwareOK_ = checkHWStatus_();
00212 TLOG(TLVL_GETNEXT) << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_;
00213 lastMonitoringCall_ = now;
00214 }
00215 }
00216
00217 try
00218 {
00219 std::lock_guard<std::mutex> lk(mutex_);
00220 if (useDataThread_)
00221 {
00222 TLOG(TLVL_TRACE) << "getNext: Calling applyRequests";
00223 result = applyRequests(output);
00224 TLOG(TLVL_TRACE) << "getNext: Done with applyRequests result=" << std::boolalpha << result;
00225 for (auto dataIter = output.begin(); dataIter != output.end(); ++dataIter)
00226 {
00227 TLOG(20) << "getNext: applyRequests() returned fragment with sequenceID = " << (*dataIter)->sequenceID()
00228 << ", timestamp = " << (*dataIter)->timestamp() << ", and sizeBytes = " << (*dataIter)->sizeBytes();
00229 }
00230
00231 if (exception())
00232 {
00233 TLOG(TLVL_ERROR) << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
00234 throw cet::exception("CommandableFragmentGenerator") << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
00235 }
00236 }
00237 else
00238 {
00239 if (!isHardwareOK_)
00240 {
00241 TLOG(TLVL_ERROR) << "Stopping CFG because the hardware reports bad status!";
00242 return false;
00243 }
00244 TLOG(TLVL_TRACE) << "getNext: Calling getNext_ w/ ev_counter()=" << ev_counter();
00245 try
00246 {
00247 result = getNext_(output);
00248 }
00249 catch (...)
00250 {
00251 throw;
00252 }
00253 TLOG(TLVL_TRACE) << "getNext: Done with getNext_ - ev_counter() now " << ev_counter();
00254 for (auto dataIter = output.begin(); dataIter != output.end(); ++dataIter)
00255 {
00256 TLOG(TLVL_GETNEXT_VERBOSE) << "getNext: getNext_() returned fragment with sequenceID = " << (*dataIter)->sequenceID()
00257 << ", timestamp = " << (*dataIter)->timestamp() << ", and sizeBytes = " << (*dataIter)->sizeBytes();
00258 }
00259 }
00260 }
00261 catch (const cet::exception& e)
00262 {
00263 latest_exception_report_ = "cet::exception caught in getNext(): ";
00264 latest_exception_report_.append(e.what());
00265 TLOG(TLVL_ERROR) << "getNext: cet::exception caught: " << e;
00266 set_exception(true);
00267 return false;
00268 }
00269 catch (const boost::exception& e)
00270 {
00271 latest_exception_report_ = "boost::exception caught in getNext(): ";
00272 latest_exception_report_.append(boost::diagnostic_information(e));
00273 TLOG(TLVL_ERROR) << "getNext: boost::exception caught: " << boost::diagnostic_information(e);
00274 set_exception(true);
00275 return false;
00276 }
00277 catch (const std::exception& e)
00278 {
00279 latest_exception_report_ = "std::exception caught in getNext(): ";
00280 latest_exception_report_.append(e.what());
00281 TLOG(TLVL_ERROR) << "getNext: std::exception caught: " << e.what();
00282 set_exception(true);
00283 return false;
00284 }
00285 catch (...)
00286 {
00287 latest_exception_report_ = "Unknown exception caught in getNext().";
00288 TLOG(TLVL_ERROR) << "getNext: unknown exception caught";
00289 set_exception(true);
00290 return false;
00291 }
00292
00293 if (!result)
00294 {
00295 TLOG(TLVL_DEBUG) << "getNext: Either getNext_ or applyRequests returned false, stopping";
00296 }
00297
00298 if (metricMan && !output.empty())
00299 {
00300 auto timestamp = output.front()->timestamp();
00301
00302 if (output.size() > 1)
00303 {
00304 for (auto& outputfrag : output)
00305 {
00306 if (outputfrag->timestamp() > timestamp)
00307 {
00308 timestamp = outputfrag->timestamp();
00309 }
00310 }
00311 }
00312
00313 metricMan->sendMetric("Last Timestamp", timestamp, "Ticks", 1,
00314 MetricMode::LastPoint, app_name);
00315 }
00316
00317 return result;
00318 }
00319
00320 bool artdaq::CommandableFragmentGenerator::check_stop()
00321 {
00322 TLOG(TLVL_CHECKSTOP) << "CFG::check_stop: should_stop=" << should_stop() << ", useDataThread_=" << useDataThread_ << ", exception status =" << int(exception());
00323
00324 if (!should_stop()) return false;
00325 if (!useDataThread_ || mode_ == RequestMode::Ignored) return true;
00326 if (force_stop_) return true;
00327
00328
00329 TLOG(TLVL_DEBUG) << "should_stop is true, force_stop_ is false, requestReceiver_->isRunning() is " << std::boolalpha << requestReceiver_->isRunning();
00330 return !requestReceiver_->isRunning();
00331 }
00332
00333 int artdaq::CommandableFragmentGenerator::fragment_id() const
00334 {
00335 if (fragment_ids_.size() != 1)
00336 {
00337 throw cet::exception("Error in CommandableFragmentGenerator: can't call fragment_id() unless member fragment_ids_ vector is length 1");
00338 }
00339 else
00340 {
00341 return fragment_ids_[0];
00342 }
00343 }
00344
00345 size_t artdaq::CommandableFragmentGenerator::ev_counter_inc(size_t step, bool force)
00346 {
00347 if (force || mode_ == RequestMode::Ignored)
00348 {
00349 TLOG(TLVL_EVCOUNTERINC) << "ev_counter_inc: Incrementing ev_counter from " << ev_counter() << " by " << step;
00350 return ev_counter_.fetch_add(step);
00351 }
00352 return ev_counter_.load();
00353 }
00354
00355 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
00356 {
00357 TLOG(TLVL_TRACE) << "Start Command received.";
00358 if (run < 0) throw cet::exception("CommandableFragmentGenerator") << "negative run number";
00359
00360 timeout_ = timeout;
00361 timestamp_ = timestamp;
00362 ev_counter_.store(1);
00363 windows_sent_ooo_.clear();
00364 dataBuffer_.clear();
00365 should_stop_.store(false);
00366 force_stop_.store(false);
00367 exception_.store(false);
00368 run_number_ = run;
00369 subrun_number_ = 1;
00370 latest_exception_report_ = "none";
00371
00372 start();
00373
00374 std::unique_lock<std::mutex> lk(mutex_);
00375 if (useDataThread_) startDataThread();
00376 if (useMonitoringThread_) startMonitoringThread();
00377 if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
00378 TLOG(TLVL_TRACE) << "Start Command complete.";
00379 }
00380
00381 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
00382 {
00383 TLOG(TLVL_TRACE) << "Stop Command received.";
00384
00385 timeout_ = timeout;
00386 timestamp_ = timestamp;
00387 if (requestReceiver_ && requestReceiver_->isRunning()) {
00388 TLOG(TLVL_DEBUG) << "Stopping Request receiver thread BEGIN";
00389 requestReceiver_->stopRequestReceiverThread();
00390 TLOG(TLVL_DEBUG) << "Stopping Request receiver thread END";
00391 }
00392
00393 stopNoMutex();
00394 should_stop_.store(true);
00395 std::unique_lock<std::mutex> lk(mutex_);
00396 stop();
00397
00398 joinThreads();
00399 TLOG(TLVL_TRACE) << "Stop Command complete.";
00400 }
00401
00402 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
00403 {
00404 TLOG(TLVL_TRACE) << "Pause Command received.";
00405 timeout_ = timeout;
00406 timestamp_ = timestamp;
00407
00408
00409 pauseNoMutex();
00410 should_stop_.store(true);
00411 std::unique_lock<std::mutex> lk(mutex_);
00412
00413 pause();
00414 }
00415
00416 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
00417 {
00418 TLOG(TLVL_TRACE) << "Resume Command received.";
00419 timeout_ = timeout;
00420 timestamp_ = timestamp;
00421
00422 subrun_number_ += 1;
00423 should_stop_ = false;
00424 {
00425 std::unique_lock<std::mutex> lk(dataBufferMutex_);
00426 dataBuffer_.clear();
00427 }
00428
00429 resume();
00430
00431 std::unique_lock<std::mutex> lk(mutex_);
00432
00433
00434
00435 TLOG(TLVL_TRACE) << "Resume Command complete.";
00436 }
00437
00438 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
00439 {
00440 TLOG(TLVL_TRACE) << "Report Command received.";
00441 std::lock_guard<std::mutex> lk(mutex_);
00442
00443
00444
00445
00446
00447
00448 std::string childReport = reportSpecific(which);
00449 if (childReport.length() > 0) { return childReport; }
00450
00451
00452 if (which == "latest_exception")
00453 {
00454 return latest_exception_report_;
00455 }
00456
00457
00458 childReport = report();
00459 if (childReport.length() > 0) { return childReport; }
00460
00461
00462 std::string tmpString = "The \"" + which + "\" command is not ";
00463 tmpString.append("currently supported by the ");
00464 tmpString.append(metricsReportingInstanceName());
00465 tmpString.append(" fragment generator.");
00466 TLOG(TLVL_TRACE) << "Report Command complete.";
00467 return tmpString;
00468 }
00469
00470
00471 void artdaq::CommandableFragmentGenerator::pauseNoMutex()
00472 {
00473 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
00474 }
00475
00476 void artdaq::CommandableFragmentGenerator::pause()
00477 {
00478 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
00479 }
00480
00481 void artdaq::CommandableFragmentGenerator::resume()
00482 {
00483 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
00484 }
00485
00486 std::string artdaq::CommandableFragmentGenerator::report()
00487 {
00488 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
00489 return "";
00490 }
00491
00492 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const&)
00493 {
00494 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
00495 return "";
00496 }
00497
00498 bool artdaq::CommandableFragmentGenerator::checkHWStatus_()
00499 {
00500 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
00501 return true;
00502 }
00503
00504 bool artdaq::CommandableFragmentGenerator::metaCommand(std::string const&, std::string const&)
00505 {
00506 #pragma message "Using default implementation of CommandableFragmentGenerator::metaCommand(std::string, std::string)"
00507 return true;
00508 }
00509
00510 void artdaq::CommandableFragmentGenerator::startDataThread()
00511 {
00512 if (dataThread_.joinable()) dataThread_.join();
00513 TLOG(TLVL_INFO) << "Starting Data Receiver Thread";
00514 try {
00515 dataThread_ = boost::thread(&CommandableFragmentGenerator::getDataLoop, this);
00516 }
00517 catch (const boost::exception& e)
00518 {
00519 TLOG(TLVL_ERROR) << "Caught boost::exception starting Data Receiver thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
00520 std::cerr << "Caught boost::exception starting Data Receiver thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
00521 exit(5);
00522 }
00523 }
00524
00525 void artdaq::CommandableFragmentGenerator::startMonitoringThread()
00526 {
00527 if (monitoringThread_.joinable()) monitoringThread_.join();
00528 TLOG(TLVL_INFO) << "Starting Hardware Monitoring Thread";
00529 try {
00530 monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
00531 }
00532 catch (const boost::exception& e)
00533 {
00534 TLOG(TLVL_ERROR) << "Caught boost::exception starting Hardware Monitoring thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
00535 std::cerr << "Caught boost::exception starting Hardware Monitoring thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
00536 exit(5);
00537 }
00538 }
00539
00540 std::string artdaq::CommandableFragmentGenerator::printMode_()
00541 {
00542 switch (mode_)
00543 {
00544 case RequestMode::Single:
00545 return "Single";
00546 case RequestMode::Buffer:
00547 return "Buffer";
00548 case RequestMode::Window:
00549 return "Window";
00550 case RequestMode::Ignored:
00551 return "Ignored";
00552 }
00553
00554 return "ERROR";
00555 }
00556
00557
00558
00559
00560
00561 void artdaq::CommandableFragmentGenerator::getDataLoop()
00562 {
00563 data_thread_running_ = true;
00564 while (!force_stop_)
00565 {
00566 if (!isHardwareOK_)
00567 {
00568 TLOG(TLVL_DEBUG) << "getDataLoop: isHardwareOK is " << isHardwareOK_ << ", aborting data thread";
00569 data_thread_running_ = false;
00570 return;
00571 }
00572
00573 TLOG(TLVL_GETDATALOOP) << "getDataLoop: calling getNext_";
00574
00575 bool data = false;
00576 auto startdata = std::chrono::steady_clock::now();
00577
00578 try
00579 {
00580 data = getNext_(newDataBuffer_);
00581 }
00582 catch (...)
00583 {
00584 ExceptionHandler(ExceptionHandlerRethrow::no,
00585 "Exception thrown by fragment generator in CommandableFragmentGenerator::getDataLoop; setting exception state to \"true\"");
00586 set_exception(true);
00587
00588 data_thread_running_ = false;
00589 return;
00590 }
00591 for (auto dataIter = newDataBuffer_.begin(); dataIter != newDataBuffer_.end(); ++dataIter)
00592 {
00593 TLOG(TLVL_GETDATALOOP_VERBOSE) << "getDataLoop: getNext_() returned fragment with timestamp = " << (*dataIter)->timestamp() << ", and sizeBytes = " << (*dataIter)->sizeBytes();
00594 }
00595
00596 if (metricMan)
00597 {
00598 metricMan->sendMetric("Avg Data Acquisition Time", TimeUtils::GetElapsedTime(startdata), "s", 3, artdaq::MetricMode::Average);
00599 }
00600
00601 if (newDataBuffer_.size() == 0 && sleep_on_no_data_us_ > 0)
00602 {
00603 usleep(sleep_on_no_data_us_);
00604 }
00605
00606 TLOG(TLVL_GETDATALOOP_DATABUFFWAIT) << "Waiting for data buffer ready";
00607 if (!waitForDataBufferReady()) return;
00608 TLOG(TLVL_GETDATALOOP_DATABUFFWAIT) << "Done waiting for data buffer ready";
00609
00610 TLOG(TLVL_GETDATALOOP) << "getDataLoop: processing data";
00611 if (data && !force_stop_)
00612 {
00613 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00614 switch (mode_)
00615 {
00616 case RequestMode::Single:
00617
00618 while (newDataBuffer_.size() >= fragment_ids_.size())
00619 {
00620 dataBuffer_.clear();
00621 auto it = newDataBuffer_.begin();
00622 std::advance(it, fragment_ids_.size());
00623 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_, newDataBuffer_.begin(), it);
00624 }
00625 break;
00626 case RequestMode::Buffer:
00627 case RequestMode::Ignored:
00628 case RequestMode::Window:
00629 default:
00630
00631 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_);
00632 break;
00633 }
00634 getDataBufferStats();
00635 }
00636
00637 {
00638 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00639 if (dataBuffer_.size() > 0)
00640 {
00641 dataCondition_.notify_all();
00642 }
00643 }
00644 if (!data || force_stop_)
00645 {
00646 TLOG(TLVL_INFO) << "Data flow has stopped. Ending data collection thread";
00647 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00648 data_thread_running_ = false;
00649 if (requestReceiver_) requestReceiver_->ClearRequests();
00650 newDataBuffer_.clear();
00651 TLOG(TLVL_INFO) << "getDataLoop: Ending thread";
00652 return;
00653 }
00654 }
00655 }
00656
00657 bool artdaq::CommandableFragmentGenerator::waitForDataBufferReady()
00658 {
00659 auto startwait = std::chrono::steady_clock::now();
00660 auto first = true;
00661 auto lastwaittime = 0ULL;
00662
00663 {
00664 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00665 getDataBufferStats();
00666 }
00667
00668 while (dataBufferIsTooLarge())
00669 {
00670 if (!circularDataBufferMode_)
00671 {
00672 if (should_stop())
00673 {
00674 TLOG(TLVL_DEBUG) << "Run ended while waiting for buffer to shrink!";
00675 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00676 getDataBufferStats();
00677 dataCondition_.notify_all();
00678 data_thread_running_ = false;
00679 return false;
00680 }
00681 auto waittime = TimeUtils::GetElapsedTimeMilliseconds(startwait);
00682
00683 if (first || (waittime != lastwaittime && waittime % 1000 == 0))
00684 {
00685 TLOG(TLVL_WARNING) << "Bad Omen: Data Buffer has exceeded its size limits. "
00686 << "(seq_id=" << ev_counter()
00687 << ", frags=" << dataBufferDepthFragments_ << "/" << maxDataBufferDepthFragments_
00688 << ", szB=" << dataBufferDepthBytes_ << "/" << maxDataBufferDepthBytes_ << ")";
00689 TLOG(TLVL_TRACE) << "Bad Omen: Possible causes include requests not getting through or Ignored-mode BR issues";
00690 first = false;
00691 }
00692 if (waittime % 5 && waittime != lastwaittime)
00693 {
00694 TLOG(TLVL_WAITFORBUFFERREADY) << "getDataLoop: Data Retreival paused for " << waittime << " ms waiting for data buffer to drain";
00695 }
00696 lastwaittime = waittime;
00697 usleep(1000);
00698 }
00699 else
00700 {
00701 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00702 getDataBufferStats();
00703 if (dataBufferIsTooLarge())
00704 {
00705 if (dataBuffer_.begin() == dataBuffer_.end())
00706 {
00707 TLOG(TLVL_WARNING) << "Data buffer is reported as too large, but doesn't contain any Fragments! Possible corrupt memory!";
00708 continue;
00709 }
00710 if (*dataBuffer_.begin())
00711 {
00712 TLOG(TLVL_WAITFORBUFFERREADY) << "waitForDataBufferReady: Dropping Fragment with timestamp " << (*dataBuffer_.begin())->timestamp() << " from data buffer (Buffer over-size, circular data buffer mode)";
00713 }
00714 dataBuffer_.erase(dataBuffer_.begin());
00715 getDataBufferStats();
00716 }
00717
00718 }
00719 }
00720 return true;
00721 }
00722
00723 bool artdaq::CommandableFragmentGenerator::dataBufferIsTooLarge()
00724 {
00725 return (maxDataBufferDepthFragments_ > 0 && dataBufferDepthFragments_ > maxDataBufferDepthFragments_) || (maxDataBufferDepthBytes_ > 0 && dataBufferDepthBytes_ > maxDataBufferDepthBytes_);
00726 }
00727
00728 void artdaq::CommandableFragmentGenerator::getDataBufferStats()
00729 {
00731 dataBufferDepthFragments_ = dataBuffer_.size();
00732 size_t acc = 0;
00733 TLOG(TLVL_GETBUFFERSTATS) << "getDataBufferStats: Calculating buffer size";
00734 for (auto i = dataBuffer_.begin(); i != dataBuffer_.end(); ++i)
00735 {
00736 if (i->get() != nullptr)
00737 {
00738 acc += (*i)->sizeBytes();
00739 }
00740 }
00741 dataBufferDepthBytes_ = acc;
00742
00743 if (metricMan)
00744 {
00745 TLOG(TLVL_GETBUFFERSTATS) << "getDataBufferStats: Sending Metrics";
00746 metricMan->sendMetric("Buffer Depth Fragments", dataBufferDepthFragments_.load(), "fragments", 1, MetricMode::LastPoint);
00747 metricMan->sendMetric("Buffer Depth Bytes", dataBufferDepthBytes_.load(), "bytes", 1, MetricMode::LastPoint);
00748 }
00749 TLOG(TLVL_GETBUFFERSTATS) << "getDataBufferStats: frags=" << dataBufferDepthFragments_.load() << "/" << maxDataBufferDepthFragments_
00750 << ", sz=" << dataBufferDepthBytes_.load() << "/" << maxDataBufferDepthBytes_;
00751 }
00752
00753 void artdaq::CommandableFragmentGenerator::checkDataBuffer()
00754 {
00755 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00756 dataCondition_.wait_for(lock, std::chrono::milliseconds(10));
00757 if (dataBufferDepthFragments_ > 0)
00758 {
00759 if ((mode_ == RequestMode::Buffer || mode_ == RequestMode::Window))
00760 {
00761
00762 while (dataBufferIsTooLarge())
00763 {
00764 TLOG(TLVL_CHECKDATABUFFER) << "checkDataBuffer: Dropping Fragment with timestamp " << (*dataBuffer_.begin())->timestamp() << " from data buffer (Buffer over-size)";
00765 dataBuffer_.erase(dataBuffer_.begin());
00766 getDataBufferStats();
00767 }
00768 if (dataBuffer_.size() > 0)
00769 {
00770 TLOG(TLVL_CHECKDATABUFFER) << "Determining if Fragments can be dropped from data buffer";
00771 Fragment::timestamp_t last = dataBuffer_.back()->timestamp();
00772 Fragment::timestamp_t min = last > staleTimeout_ ? last - staleTimeout_ : 0;
00773 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00774 {
00775 if ((*it)->timestamp() < min)
00776 {
00777 TLOG(TLVL_CHECKDATABUFFER) << "checkDataBuffer: Dropping Fragment with timestamp " << (*it)->timestamp() << " from data buffer (timeout=" << staleTimeout_ << ", min=" << min << ")";
00778 it = dataBuffer_.erase(it);
00779 }
00780 else
00781 {
00782 ++it;
00783 }
00784 }
00785 getDataBufferStats();
00786 }
00787 }
00788 else if (mode_ == RequestMode::Single && dataBuffer_.size() > fragment_ids_.size())
00789 {
00790
00791 while (dataBuffer_.size() > fragment_ids_.size())
00792 {
00793 dataBuffer_.erase(dataBuffer_.begin());
00794 }
00795 }
00796 }
00797 }
00798
00799 void artdaq::CommandableFragmentGenerator::getMonitoringDataLoop()
00800 {
00801 while (!force_stop_)
00802 {
00803 if (should_stop() || monitoringInterval_ <= 0)
00804 {
00805 TLOG(TLVL_DEBUG) << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
00806 << " and monitoringInterval is " << monitoringInterval_ << ", returning";
00807 return;
00808 }
00809 TLOG(TLVL_GETMONITORINGDATA) << "getMonitoringDataLoop: Determining whether to call checkHWStatus_";
00810
00811 auto now = std::chrono::steady_clock::now();
00812 if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
00813 {
00814 isHardwareOK_ = checkHWStatus_();
00815 TLOG(TLVL_GETMONITORINGDATA) << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_;
00816 lastMonitoringCall_ = now;
00817 }
00818 usleep(monitoringInterval_ / 10);
00819 }
00820 }
00821
00822 void artdaq::CommandableFragmentGenerator::applyRequestsIgnoredMode(artdaq::FragmentPtrs& frags)
00823 {
00824
00825 TLOG(TLVL_APPLYREQUESTS) << "Mode is Ignored; Copying data to output";
00826 std::move(dataBuffer_.begin(), dataBuffer_.end(), std::inserter(frags, frags.end()));
00827 dataBuffer_.clear();
00828 }
00829
00830 void artdaq::CommandableFragmentGenerator::applyRequestsSingleMode(artdaq::FragmentPtrs& frags)
00831 {
00832
00833 auto requests = requestReceiver_->GetRequests();
00834 while (requests.size() > 1)
00835 {
00836
00837 requestReceiver_->RemoveRequest(requests.begin()->first);
00838 requests.erase(requests.begin());
00839 }
00840 sendEmptyFragments(frags, requests);
00841
00842
00843 if (requests.size() == 0 || !requests.count(ev_counter())) return;
00844
00845 if (dataBuffer_.size() > 0)
00846 {
00847 TLOG(TLVL_APPLYREQUESTS) << "Mode is Single; Sending copy of last event";
00848 for (auto& fragptr : dataBuffer_)
00849 {
00850
00851 auto frag = fragptr.get();
00852 auto newfrag = std::unique_ptr<artdaq::Fragment>(new Fragment(ev_counter(), frag->fragmentID()));
00853 newfrag->resize(frag->size() - detail::RawFragmentHeader::num_words());
00854 memcpy(newfrag->headerAddress(), frag->headerAddress(), frag->sizeBytes());
00855 newfrag->setTimestamp(requests[ev_counter()]);
00856 newfrag->setSequenceID(ev_counter());
00857 frags.push_back(std::move(newfrag));
00858 }
00859 }
00860 else
00861 {
00862 sendEmptyFragment(frags, ev_counter(), "No data for");
00863 }
00864 requestReceiver_->RemoveRequest(ev_counter());
00865 ev_counter_inc(1, true);
00866 }
00867
00868 void artdaq::CommandableFragmentGenerator::applyRequestsBufferMode(artdaq::FragmentPtrs& frags)
00869 {
00870
00871 auto requests = requestReceiver_->GetRequests();
00872 while (requests.size() > 1)
00873 {
00874
00875 requestReceiver_->RemoveRequest(requests.begin()->first);
00876 requests.erase(requests.begin());
00877 }
00878 sendEmptyFragments(frags, requests);
00879
00880
00881 if (requests.size() == 0 || !requests.count(ev_counter())) return;
00882
00883 TLOG(TLVL_DEBUG) << "Creating ContainerFragment for Buffered Fragments";
00884 frags.emplace_back(new artdaq::Fragment(ev_counter(), fragment_id()));
00885 frags.back()->setTimestamp(requests[ev_counter()]);
00886 ContainerFragmentLoader cfl(*frags.back());
00887 cfl.set_missing_data(false);
00888
00889
00890
00891 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00892 {
00893 TLOG(TLVL_APPLYREQUESTS) << "ApplyRequests: Adding Fragment with timestamp " << (*it)->timestamp() << " to Container with sequence ID " << ev_counter();
00894 cfl.addFragment(*it);
00895 it = dataBuffer_.erase(it);
00896 }
00897 requestReceiver_->RemoveRequest(ev_counter());
00898 ev_counter_inc(1, true);
00899 }
00900
00901 void artdaq::CommandableFragmentGenerator::applyRequestsWindowMode(artdaq::FragmentPtrs& frags)
00902 {
00903 TLOG(TLVL_APPLYREQUESTS) << "applyRequestsWindowMode BEGIN";
00904
00905 auto requests = requestReceiver_->GetRequests();
00906
00907 TLOG(TLVL_APPLYREQUESTS) << "applyRequestsWindowMode: Starting request processing";
00908 for (auto req = requests.begin(); req != requests.end();)
00909 {
00910 TLOG(TLVL_APPLYREQUESTS) << "applyRequestsWindowMode: processing request with sequence ID " << req->first << ", timestamp " << req->second;
00911
00912
00913 while (req->first < ev_counter() && requests.size() > 0)
00914 {
00915 TLOG(TLVL_APPLYREQUESTS) << "applyRequestsWindowMode: Clearing passed request for sequence ID " << req->first;
00916 requestReceiver_->RemoveRequest(req->first);
00917 req = requests.erase(req);
00918 }
00919 if (requests.size() == 0) break;
00920
00921 auto ts = req->second;
00922 TLOG(TLVL_APPLYREQUESTS) << "applyRequests: Checking that data exists for request window " << req->first;
00923 Fragment::timestamp_t min = ts > windowOffset_ ? ts - windowOffset_ : 0;
00924 Fragment::timestamp_t max = min + windowWidth_;
00925 TLOG(TLVL_APPLYREQUESTS) << "ApplyRequests: min is " << min << ", max is " << max
00926 << " and last point in buffer is " << (dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0) << " (sz=" << dataBuffer_.size() << ")";
00927 bool windowClosed = dataBuffer_.size() > 0 && dataBuffer_.back()->timestamp() >= max;
00928 bool windowTimeout = !windowClosed && TimeUtils::GetElapsedTimeMicroseconds(requestReceiver_->GetRequestTime(req->first)) > window_close_timeout_us_;
00929 if (windowTimeout)
00930 {
00931 TLOG(TLVL_WARNING) << "applyRequests: A timeout occurred waiting for data to close the request window ({" << min << "-" << max
00932 << "}, buffer={" << (dataBuffer_.size() > 0 ? dataBuffer_.front()->timestamp() : 0) << "-"
00933 << (dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0)
00934 << "} ). Time waiting: "
00935 << TimeUtils::GetElapsedTimeMicroseconds(requestReceiver_->GetRequestTime(req->first)) << " us "
00936 << "(> " << window_close_timeout_us_ << " us).";
00937 }
00938 if (windowClosed || !data_thread_running_ || windowTimeout)
00939 {
00940 TLOG(TLVL_DEBUG) << "applyRequests: Creating ContainerFragment for Window-requested Fragments";
00941 frags.emplace_back(new artdaq::Fragment(req->first, fragment_id()));
00942 frags.back()->setTimestamp(ts);
00943 ContainerFragmentLoader cfl(*frags.back());
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957 if (!windowClosed || (dataBuffer_.size() > 0 && dataBuffer_.front()->timestamp() > min))
00958 {
00959 TLOG(TLVL_DEBUG) << "applyRequests: Request window starts before and/or ends after the current data buffer, setting ContainerFragment's missing_data flag!"
00960 << " (requestWindowRange=[" << min << "," << max << "], "
00961 << "buffer={" << (dataBuffer_.size() > 0 ? dataBuffer_.front()->timestamp() : 0) << "-"
00962 << (dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0) << "}";
00963 cfl.set_missing_data(true);
00964 }
00965
00966
00967
00968 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00969 {
00970 Fragment::timestamp_t fragT = (*it)->timestamp();
00971 if (fragT < min || fragT > max || (fragT == max && windowWidth_ > 0))
00972 {
00973 ++it;
00974 continue;
00975 }
00976
00977 TLOG(TLVL_APPLYREQUESTS) << "applyRequests: Adding Fragment with timestamp " << (*it)->timestamp() << " to Container";
00978 cfl.addFragment(*it);
00979
00980 if (uniqueWindows_)
00981 {
00982 it = dataBuffer_.erase(it);
00983 }
00984 else
00985 {
00986 ++it;
00987 }
00988 }
00989 requestReceiver_->RemoveRequest(req->first);
00990 checkOutOfOrderWindows(req->first);
00991 requestReceiver_->RemoveRequest(req->first);
00992 req = requests.erase(req);
00993 }
00994 else
00995 {
00996 ++req;
00997 }
00998 }
00999 }
01000
01001 bool artdaq::CommandableFragmentGenerator::applyRequests(artdaq::FragmentPtrs& frags)
01002 {
01003 if (check_stop() || exception())
01004 {
01005 return false;
01006 }
01007
01008
01009 if (mode_ == RequestMode::Ignored)
01010 {
01011 while (dataBufferDepthFragments_ <= 0)
01012 {
01013 if (check_stop() || exception() || !isHardwareOK_) return false;
01014 std::unique_lock<std::mutex> lock(dataBufferMutex_);
01015 dataCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return dataBufferDepthFragments_ > 0; });
01016 }
01017 }
01018 else
01019 {
01020 if ((check_stop() && requestReceiver_->size() == 0) || exception()) return false;
01021 checkDataBuffer();
01022
01023
01024 auto counter = 0;
01025
01026 while (requestReceiver_->size() == 0 && counter < 100)
01027 {
01028 if (check_stop() || exception()) return false;
01029
01030 checkDataBuffer();
01031
01032 requestReceiver_->WaitForRequests(10);
01033 counter++;
01034 }
01035 }
01036
01037 {
01038 std::unique_lock<std::mutex> dlk(dataBufferMutex_);
01039
01040 switch (mode_)
01041 {
01042 case RequestMode::Single:
01043 applyRequestsSingleMode(frags);
01044 break;
01045 case RequestMode::Window:
01046 applyRequestsWindowMode(frags);
01047 break;
01048 case RequestMode::Buffer:
01049 applyRequestsBufferMode(frags);
01050 break;
01051 case RequestMode::Ignored:
01052 default:
01053 applyRequestsIgnoredMode(frags);
01054 break;
01055 }
01056
01057 if (!data_thread_running_ || force_stop_)
01058 {
01059 TLOG(TLVL_INFO) << "Data thread has stopped; Clearing data buffer";
01060 dataBuffer_.clear();
01061 }
01062
01063 getDataBufferStats();
01064 }
01065
01066 if (frags.size() > 0)
01067 TLOG(TLVL_APPLYREQUESTS) << "Finished Processing Event " << (*frags.begin())->sequenceID() << " for fragment_id " << fragment_id() << ".";
01068 return true;
01069 }
01070
01071 bool artdaq::CommandableFragmentGenerator::sendEmptyFragment(artdaq::FragmentPtrs& frags, size_t seqId, std::string desc)
01072 {
01073 TLOG(TLVL_WARNING) << desc << " sequence ID " << seqId << ", sending empty fragment";
01074 for (auto fid : fragment_ids_)
01075 {
01076 auto frag = new Fragment();
01077 frag->setSequenceID(seqId);
01078 frag->setFragmentID(fid);
01079 frag->setSystemType(Fragment::EmptyFragmentType);
01080 frags.emplace_back(FragmentPtr(frag));
01081 }
01082 return true;
01083 }
01084
01085 void artdaq::CommandableFragmentGenerator::sendEmptyFragments(artdaq::FragmentPtrs& frags, std::map<Fragment::sequence_id_t, Fragment::timestamp_t>& requests)
01086 {
01087 if (requests.size() > 0)
01088 {
01089 TLOG(TLVL_SENDEMPTYFRAGMENTS) << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << requests.begin()->first;
01090 while (requests.begin()->first > ev_counter())
01091 {
01092 sendEmptyFragment(frags, ev_counter(), "Missed request for");
01093 ev_counter_inc(1, true);
01094 }
01095 }
01096 }
01097
01098 void artdaq::CommandableFragmentGenerator::checkOutOfOrderWindows(artdaq::Fragment::sequence_id_t seq)
01099 {
01100 windows_sent_ooo_[seq] = std::chrono::steady_clock::now();
01101
01102 auto it = windows_sent_ooo_.begin();
01103 while (it != windows_sent_ooo_.end())
01104 {
01105 if (seq == it->first && it->first == ev_counter())
01106 {
01107 TLOG(TLVL_CHECKWINDOWS) << "checkOutOfOrderWindows: Sequence ID matches ev_counter, incrementing ev_counter (" << ev_counter() << ")";
01108 ev_counter_inc(1, true);
01109 it = windows_sent_ooo_.erase(it);
01110 }
01111 else if (it->first <= ev_counter())
01112 {
01113 TLOG(TLVL_CHECKWINDOWS) << "checkOutOfOrderWindows: Data-taking has caught up to out-of-order window request " << it->first << ", removing from list. ev_counter=" << ev_counter();
01114 requestReceiver_->RemoveRequest(ev_counter());
01115 if (it->first == ev_counter()) ev_counter_inc(1, true);
01116 it = windows_sent_ooo_.erase(it);
01117 }
01118 else if (TimeUtils::GetElapsedTimeMicroseconds(it->second) > missing_request_window_timeout_us_)
01119 {
01120 TLOG(TLVL_CHECKWINDOWS) << "checkOutOfOrderWindows: Out-of-order window " << it->first << " has timed out, setting current sequence ID and removing from list";
01121 while (ev_counter() <= it->first)
01122 {
01123 if (ev_counter() < it->first) TLOG(TLVL_WARNING) << "Missed request for sequence ID " << ev_counter() << "! Will not send any data for this sequence ID!";
01124 requestReceiver_->RemoveRequest(ev_counter());
01125 ev_counter_inc(1, true);
01126 }
01127 windows_sent_ooo_.erase(windows_sent_ooo_.begin(), it);
01128 it = windows_sent_ooo_.erase(it);
01129 }
01130 else
01131 {
01132 TLOG(TLVL_CHECKWINDOWS) << "checkOutOfOrderWindows: Out-of-order window " << it->first << " waiting. Current event counter = " << ev_counter();
01133 ++it;
01134 }
01135 }
01136 }
01137