00001 #define TRACE_NAME (app_name + "_CommandableFragmentGenerator").c_str() // include these 2 first -
00002 #include "artdaq/DAQdata/Globals.hh"
00003
00004 #include "artdaq/Application/CommandableFragmentGenerator.hh"
00005
00006 #include <boost/exception/all.hpp>
00007 #include <boost/throw_exception.hpp>
00008
00009 #include <limits>
00010 #include <iterator>
00011
00012 #include "canvas/Utilities/Exception.h"
00013 #include "cetlib_except/exception.h"
00014 #include "fhiclcpp/ParameterSet.h"
00015
00016 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
00017 #include "artdaq-core/Data/Fragment.hh"
00018 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
00019 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00020 #include "artdaq-core/Utilities/TimeUtils.hh"
00021
00022 #include <fstream>
00023 #include <iomanip>
00024 #include <iterator>
00025 #include <iostream>
00026 #include <iomanip>
00027 #include <algorithm>
00028 #include <sys/poll.h>
00029 #include "artdaq/DAQdata/TCPConnect.hh"
00030
00031 #define TLVL_GETNEXT 10
00032 #define TLVL_GETNEXT_VERBOSE 20
00033 #define TLVL_CHECKSTOP 11
00034 #define TLVL_EVCOUNTERINC 12
00035 #define TLVL_GETDATALOOP 13
00036 #define TLVL_GETDATALOOP_DATABUFFWAIT 21
00037 #define TLVL_GETDATALOOP_VERBOSE 20
00038 #define TLVL_WAITFORBUFFERREADY 15
00039 #define TLVL_GETBUFFERSTATS 16
00040 #define TLVL_CHECKDATABUFFER 17
00041 #define TLVL_GETMONITORINGDATA 18
00042 #define TLVL_APPLYREQUESTS 9
00043 #define TLVL_SENDEMPTYFRAGMENTS 19
00044 #define TLVL_CHECKWINDOWS 14
00045
00046 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator()
00047 : mutex_()
00048 , requestReceiver_(nullptr)
00049 , windowOffset_(0)
00050 , windowWidth_(0)
00051 , staleTimeout_(Fragment::InvalidTimestamp)
00052 , expectedType_(Fragment::EmptyFragmentType)
00053 , maxFragmentCount_(std::numeric_limits<size_t>::max())
00054 , uniqueWindows_(true)
00055 , windows_sent_ooo_()
00056 , missing_request_window_timeout_us_(1000000)
00057 , window_close_timeout_us_(2000000)
00058 , useDataThread_(false)
00059 , sleep_on_no_data_us_(0)
00060 , data_thread_running_(false)
00061 , dataBufferDepthFragments_(0)
00062 , dataBufferDepthBytes_(0)
00063 , maxDataBufferDepthFragments_(1000)
00064 , maxDataBufferDepthBytes_(1000)
00065 , useMonitoringThread_(false)
00066 , monitoringInterval_(0)
00067 , lastMonitoringCall_()
00068 , isHardwareOK_(true)
00069 , dataBuffer_()
00070 , newDataBuffer_()
00071 , run_number_(-1)
00072 , subrun_number_(-1)
00073 , timeout_(std::numeric_limits<uint64_t>::max())
00074 , timestamp_(std::numeric_limits<uint64_t>::max())
00075 , should_stop_(false)
00076 , exception_(false)
00077 , force_stop_(false)
00078 , latest_exception_report_("none")
00079 , ev_counter_(1)
00080 , board_id_(-1)
00081 , instance_name_for_metrics_("FragmentGenerator")
00082 , sleep_on_stop_us_(0)
00083 {}
00084
00085 artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(const fhicl::ParameterSet& ps)
00086 : mutex_()
00087 , requestReceiver_(nullptr)
00088 , windowOffset_(ps.get<Fragment::timestamp_t>("request_window_offset", 0))
00089 , windowWidth_(ps.get<Fragment::timestamp_t>("request_window_width", 0))
00090 , staleTimeout_(ps.get<Fragment::timestamp_t>("stale_request_timeout", 0xFFFFFFFF))
00091 , expectedType_(ps.get<Fragment::type_t>("expected_fragment_type", Fragment::type_t(Fragment::EmptyFragmentType)))
00092 , uniqueWindows_(ps.get<bool>("request_windows_are_unique", true))
00093 , windows_sent_ooo_()
00094 , missing_request_window_timeout_us_(ps.get<size_t>("missing_request_window_timeout_us", 5000000))
00095 , window_close_timeout_us_(ps.get<size_t>("window_close_timeout_us", 2000000))
00096 , useDataThread_(ps.get<bool>("separate_data_thread", false))
00097 , sleep_on_no_data_us_(ps.get<size_t>("sleep_on_no_data_us", 0))
00098 , data_thread_running_(false)
00099 , dataBufferDepthFragments_(0)
00100 , dataBufferDepthBytes_(0)
00101 , maxDataBufferDepthFragments_(ps.get<int>("data_buffer_depth_fragments", 1000))
00102 , maxDataBufferDepthBytes_(ps.get<size_t>("data_buffer_depth_mb", 1000) * 1024 * 1024)
00103 , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
00104 , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
00105 , lastMonitoringCall_()
00106 , isHardwareOK_(true)
00107 , dataBuffer_()
00108 , newDataBuffer_()
00109 , run_number_(-1)
00110 , subrun_number_(-1)
00111 , timeout_(std::numeric_limits<uint64_t>::max())
00112 , timestamp_(std::numeric_limits<uint64_t>::max())
00113 , should_stop_(false)
00114 , exception_(false)
00115 , force_stop_(false)
00116 , latest_exception_report_("none")
00117 , ev_counter_(1)
00118 , board_id_(-1)
00119 , sleep_on_stop_us_(0)
00120 {
00121 board_id_ = ps.get<int>("board_id");
00122 instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
00123
00124 fragment_ids_ = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
00125
00126 TLOG(TLVL_TRACE) << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)";
00127 int fragment_id = ps.get<int>("fragment_id", -99);
00128
00129 if (fragment_id != -99)
00130 {
00131 if (fragment_ids_.size() != 0)
00132 {
00133 latest_exception_report_ = "Error in CommandableFragmentGenerator: can't both define \"fragment_id\" and \"fragment_ids\" in FHiCL document";
00134 throw cet::exception(latest_exception_report_);
00135 }
00136 else
00137 {
00138 fragment_ids_.emplace_back(fragment_id);
00139 }
00140 }
00141
00142 sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
00143
00144 dataBuffer_.emplace_back(FragmentPtr(new Fragment()));
00145 (*dataBuffer_.begin())->setSystemType(Fragment::EmptyFragmentType);
00146
00147 std::string modeString = ps.get<std::string>("request_mode", "ignored");
00148 if (modeString == "single" || modeString == "Single")
00149 {
00150 mode_ = RequestMode::Single;
00151 }
00152 else if (modeString.find("buffer") != std::string::npos || modeString.find("Buffer") != std::string::npos)
00153 {
00154 mode_ = RequestMode::Buffer;
00155 }
00156 else if (modeString == "window" || modeString == "Window")
00157 {
00158 mode_ = RequestMode::Window;
00159 }
00160 else if (modeString.find("ignore") != std::string::npos || modeString.find("Ignore") != std::string::npos)
00161 {
00162 mode_ = RequestMode::Ignored;
00163 }
00164 TLOG(TLVL_DEBUG) << "Request mode is " << printMode_();
00165
00166 if (mode_ != RequestMode::Ignored)
00167 {
00168 if (!useDataThread_)
00169 {
00170 latest_exception_report_ = "Error in CommandableFragmentGenerator: use_data_thread must be true when request_mode is not \"Ignored\"!";
00171 throw cet::exception(latest_exception_report_);
00172 }
00173 requestReceiver_.reset(new RequestReceiver(ps));
00174 }
00175 }
00176
00177 artdaq::CommandableFragmentGenerator::~CommandableFragmentGenerator()
00178 {
00179 joinThreads();
00180 }
00181
00182 void artdaq::CommandableFragmentGenerator::joinThreads()
00183 {
00184 should_stop_ = true;
00185 force_stop_ = true;
00186 TLOG(TLVL_DEBUG) << "Joining dataThread";
00187 if (dataThread_.joinable()) dataThread_.join();
00188 TLOG(TLVL_DEBUG) << "Joining monitoringThread";
00189 if (monitoringThread_.joinable()) monitoringThread_.join();
00190 requestReceiver_.reset(nullptr);
00191 }
00192
00193 bool artdaq::CommandableFragmentGenerator::getNext(FragmentPtrs& output)
00194 {
00195 bool result = true;
00196
00197 if (check_stop()) usleep(sleep_on_stop_us_);
00198 if (exception() || force_stop_) return false;
00199
00200 if (!useMonitoringThread_ && monitoringInterval_ > 0)
00201 {
00202 TLOG(TLVL_GETNEXT) << "getNext: Checking whether to collect Monitoring Data";
00203 auto now = std::chrono::steady_clock::now();
00204
00205 if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
00206 {
00207 TLOG(TLVL_GETNEXT) << "getNext: Collecting Monitoring Data";
00208 isHardwareOK_ = checkHWStatus_();
00209 TLOG(TLVL_GETNEXT) << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_;
00210 lastMonitoringCall_ = now;
00211 }
00212 }
00213
00214 try
00215 {
00216 std::lock_guard<std::mutex> lk(mutex_);
00217 if (useDataThread_)
00218 {
00219 TLOG(TLVL_TRACE) << "getNext: Calling applyRequests";
00220 result = applyRequests(output);
00221 TLOG(TLVL_TRACE) << "getNext: Done with applyRequests result=" << std::boolalpha << result;
00222
00223 if (exception())
00224 {
00225 TLOG(TLVL_ERROR) << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
00226 throw cet::exception("CommandableFragmentGenerator") << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
00227 }
00228 }
00229 else
00230 {
00231 if (!isHardwareOK_)
00232 {
00233 TLOG(TLVL_ERROR) << "Stopping CFG because the hardware reports bad status!";
00234 return false;
00235 }
00236 TLOG(TLVL_TRACE) << "getNext: Calling getNext_ " << ev_counter();
00237 try
00238 {
00239 result = getNext_(output);
00240 }
00241 catch (...)
00242 {
00243 throw;
00244 }
00245 TLOG(TLVL_TRACE) << "getNext: Done with getNext_ " << ev_counter();
00246 for (auto dataIter = output.begin(); dataIter != output.end(); ++dataIter)
00247 {
00248 TLOG(TLVL_GETNEXT_VERBOSE) << "getNext: getNext_() returned fragment with sequenceID = " << (*dataIter)->sequenceID()
00249 << ", timestamp = " << (*dataIter)->timestamp() << ", and sizeBytes = " << (*dataIter)->sizeBytes();
00250 }
00251 }
00252 }
00253 catch (const cet::exception& e)
00254 {
00255 latest_exception_report_ = "cet::exception caught in getNext(): ";
00256 latest_exception_report_.append(e.what());
00257 TLOG(TLVL_ERROR) << "getNext: cet::exception caught: " << e;
00258 set_exception(true);
00259 return false;
00260 }
00261 catch (const boost::exception& e)
00262 {
00263 latest_exception_report_ = "boost::exception caught in getNext(): ";
00264 latest_exception_report_.append(boost::diagnostic_information(e));
00265 TLOG(TLVL_ERROR) << "getNext: boost::exception caught: " << boost::diagnostic_information(e);
00266 set_exception(true);
00267 return false;
00268 }
00269 catch (const std::exception& e)
00270 {
00271 latest_exception_report_ = "std::exception caught in getNext(): ";
00272 latest_exception_report_.append(e.what());
00273 TLOG(TLVL_ERROR) << "getNext: std::exception caught: " << e.what();
00274 set_exception(true);
00275 return false;
00276 }
00277 catch (...)
00278 {
00279 latest_exception_report_ = "Unknown exception caught in getNext().";
00280 TLOG(TLVL_ERROR) << "getNext: unknown exception caught";
00281 set_exception(true);
00282 return false;
00283 }
00284
00285 if (!result)
00286 {
00287 TLOG(TLVL_DEBUG) << "stopped ";
00288 }
00289
00290 if (metricMan && !output.empty())
00291 {
00292 auto timestamp = output.front()->timestamp();
00293
00294 if (output.size() > 1)
00295 {
00296 for (auto& outputfrag : output)
00297 {
00298 if (outputfrag->timestamp() > timestamp)
00299 {
00300 timestamp = outputfrag->timestamp();
00301 }
00302 }
00303 }
00304
00305 metricMan->sendMetric("Last Timestamp", timestamp, "Ticks", 1,
00306 MetricMode::LastPoint, app_name);
00307 }
00308
00309 return result;
00310 }
00311
00312 bool artdaq::CommandableFragmentGenerator::check_stop()
00313 {
00314 TLOG(TLVL_CHECKSTOP) << "CFG::check_stop: should_stop=" << should_stop() << ", useDataThread_=" << useDataThread_ << ", exception status =" << int(exception());
00315
00316 if (!should_stop()) return false;
00317 if (!useDataThread_ || mode_ == RequestMode::Ignored) return true;
00318 if (force_stop_) return true;
00319
00320
00321 return !requestReceiver_->isRunning();
00322 }
00323
00324 int artdaq::CommandableFragmentGenerator::fragment_id() const
00325 {
00326 if (fragment_ids_.size() != 1)
00327 {
00328 throw cet::exception("Error in CommandableFragmentGenerator: can't call fragment_id() unless member fragment_ids_ vector is length 1");
00329 }
00330 else
00331 {
00332 return fragment_ids_[0];
00333 }
00334 }
00335
00336 size_t artdaq::CommandableFragmentGenerator::ev_counter_inc(size_t step, bool force)
00337 {
00338 if (force || mode_ == RequestMode::Ignored)
00339 {
00340 TLOG(TLVL_EVCOUNTERINC) << "ev_counter_inc: Incrementing ev_counter from " << ev_counter() << " by " << step;
00341 return ev_counter_.fetch_add(step);
00342 }
00343 return ev_counter_.load();
00344 }
00345
00346 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
00347 {
00348 TLOG(TLVL_TRACE) << "Start Command received.";
00349 if (run < 0) throw cet::exception("CommandableFragmentGenerator") << "negative run number";
00350
00351 timeout_ = timeout;
00352 timestamp_ = timestamp;
00353 ev_counter_.store(1);
00354 should_stop_.store(false);
00355 exception_.store(false);
00356 run_number_ = run;
00357 subrun_number_ = 1;
00358 latest_exception_report_ = "none";
00359 dataBuffer_.clear();
00360 windows_sent_ooo_.clear();
00361
00362 start();
00363
00364 std::unique_lock<std::mutex> lk(mutex_);
00365 if (useDataThread_) startDataThread();
00366 if (useMonitoringThread_) startMonitoringThread();
00367 if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
00368 TLOG(TLVL_TRACE) << "Start Command complete.";
00369 }
00370
00371 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
00372 {
00373 TLOG(TLVL_TRACE) << "Stop Command received.";
00374
00375 timeout_ = timeout;
00376 timestamp_ = timestamp;
00377 if (requestReceiver_ && requestReceiver_->isRunning()) requestReceiver_->stopRequestReceiverThread();
00378
00379 stopNoMutex();
00380 should_stop_.store(true);
00381 std::unique_lock<std::mutex> lk(mutex_);
00382 stop();
00383 TLOG(TLVL_TRACE) << "Stop command complete.";
00384 }
00385
00386 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
00387 {
00388 TLOG(TLVL_TRACE) << "Pause Command received.";
00389 timeout_ = timeout;
00390 timestamp_ = timestamp;
00391 if (requestReceiver_->isRunning()) requestReceiver_->stopRequestReceiverThread();
00392
00393 pauseNoMutex();
00394 should_stop_.store(true);
00395 std::unique_lock<std::mutex> lk(mutex_);
00396
00397 pause();
00398 TLOG(TLVL_TRACE) << "Pause Command complete.";
00399 }
00400
00401 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
00402 {
00403 TLOG(TLVL_TRACE) << "Resume Command received.";
00404 timeout_ = timeout;
00405 timestamp_ = timestamp;
00406
00407 subrun_number_ += 1;
00408 should_stop_ = false;
00409
00410 dataBuffer_.clear();
00411
00412
00413 resume();
00414
00415 std::unique_lock<std::mutex> lk(mutex_);
00416 if (useDataThread_) startDataThread();
00417 if (useMonitoringThread_) startMonitoringThread();
00418 if (mode_ != RequestMode::Ignored && !requestReceiver_->isRunning()) requestReceiver_->startRequestReceiverThread();
00419 TLOG(TLVL_TRACE) << "Resume Command complete.";
00420 }
00421
00422 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
00423 {
00424 TLOG(TLVL_TRACE) << "Report Command received.";
00425 std::lock_guard<std::mutex> lk(mutex_);
00426
00427
00428
00429
00430
00431
00432 std::string childReport = reportSpecific(which);
00433 if (childReport.length() > 0) { return childReport; }
00434
00435
00436 if (which == "latest_exception")
00437 {
00438 return latest_exception_report_;
00439 }
00440
00441
00442 childReport = report();
00443 if (childReport.length() > 0) { return childReport; }
00444
00445
00446 std::string tmpString = "The \"" + which + "\" command is not ";
00447 tmpString.append("currently supported by the ");
00448 tmpString.append(metricsReportingInstanceName());
00449 tmpString.append(" fragment generator.");
00450 TLOG(TLVL_TRACE) << "Report Command complete.";
00451 return tmpString;
00452 }
00453
00454
00455 void artdaq::CommandableFragmentGenerator::pauseNoMutex()
00456 {
00457 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
00458 }
00459
00460 void artdaq::CommandableFragmentGenerator::pause()
00461 {
00462 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
00463 }
00464
00465 void artdaq::CommandableFragmentGenerator::resume()
00466 {
00467 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
00468 }
00469
00470 std::string artdaq::CommandableFragmentGenerator::report()
00471 {
00472 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
00473 return "";
00474 }
00475
00476 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const&)
00477 {
00478 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
00479 return "";
00480 }
00481
00482 bool artdaq::CommandableFragmentGenerator::checkHWStatus_()
00483 {
00484 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
00485 return true;
00486 }
00487
00488 bool artdaq::CommandableFragmentGenerator::metaCommand(std::string const&, std::string const&)
00489 {
00490 #pragma message "Using default implementation of CommandableFragmentGenerator::metaCommand(std::string, std::string)"
00491 return true;
00492 }
00493
00494 void artdaq::CommandableFragmentGenerator::startDataThread()
00495 {
00496 if (dataThread_.joinable()) dataThread_.join();
00497 TLOG(TLVL_INFO) << "Starting Data Receiver Thread";
00498 dataThread_ = boost::thread(&CommandableFragmentGenerator::getDataLoop, this);
00499 }
00500
00501 void artdaq::CommandableFragmentGenerator::startMonitoringThread()
00502 {
00503 if (monitoringThread_.joinable()) monitoringThread_.join();
00504 TLOG(TLVL_INFO) << "Starting Hardware Monitoring Thread";
00505 monitoringThread_ = boost::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
00506 }
00507
00508 std::string artdaq::CommandableFragmentGenerator::printMode_()
00509 {
00510 switch (mode_)
00511 {
00512 case RequestMode::Single:
00513 return "Single";
00514 case RequestMode::Buffer:
00515 return "Buffer";
00516 case RequestMode::Window:
00517 return "Window";
00518 case RequestMode::Ignored:
00519 return "Ignored";
00520 }
00521
00522 return "ERROR";
00523 }
00524
00525 void artdaq::CommandableFragmentGenerator::getDataLoop()
00526 {
00527 data_thread_running_ = true;
00528 while (!force_stop_)
00529 {
00530 if (!isHardwareOK_)
00531 {
00532 TLOG(TLVL_DEBUG) << "getDataLoop: isHardwareOK is " << isHardwareOK_ << ", aborting data thread";
00533 data_thread_running_ = false;
00534 return;
00535 }
00536
00537 TLOG(TLVL_GETDATALOOP) << "getDataLoop: calling getNext_";
00538
00539 bool data = false;
00540 auto startdata = std::chrono::steady_clock::now();
00541
00542 try
00543 {
00544 data = getNext_(newDataBuffer_);
00545 }
00546 catch (...)
00547 {
00548 ExceptionHandler(ExceptionHandlerRethrow::no,
00549 "Exception thrown by fragment generator in CommandableFragmentGenerator::getDataLoop; setting exception state to \"true\"");
00550 set_exception(true);
00551
00552 data_thread_running_ = false;
00553 return;
00554 }
00555 for (auto dataIter = newDataBuffer_.begin(); dataIter != newDataBuffer_.end(); ++dataIter)
00556 {
00557 TLOG(TLVL_GETDATALOOP_VERBOSE) << "getDataLoop: getNext_() returned fragment with sequenceID = " << (*dataIter)->sequenceID()
00558 << ", timestamp = " << (*dataIter)->timestamp() << ", and sizeBytes = " << (*dataIter)->sizeBytes();
00559 }
00560
00561 if (metricMan)
00562 {
00563 metricMan->sendMetric("Avg Data Acquisition Time", TimeUtils::GetElapsedTime(startdata), "s", 3, artdaq::MetricMode::Average);
00564 }
00565
00566 if (newDataBuffer_.size() == 0 && sleep_on_no_data_us_ > 0)
00567 {
00568 usleep(sleep_on_no_data_us_);
00569 }
00570
00571 TLOG(TLVL_GETDATALOOP_DATABUFFWAIT) << "Waiting for data buffer ready";
00572 if (!waitForDataBufferReady()) return;
00573 TLOG(TLVL_GETDATALOOP_DATABUFFWAIT) << "Done waiting for data buffer ready";
00574
00575 TLOG(TLVL_GETDATALOOP) << "getDataLoop: processing data";
00576 if (data && !force_stop_)
00577 {
00578 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00579 switch (mode_)
00580 {
00581 case RequestMode::Single:
00582
00583 while (newDataBuffer_.size() >= fragment_ids_.size())
00584 {
00585 dataBuffer_.clear();
00586 auto it = newDataBuffer_.begin();
00587 std::advance(it, fragment_ids_.size());
00588 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_, newDataBuffer_.begin(), it);
00589 }
00590 break;
00591 case RequestMode::Buffer:
00592 case RequestMode::Ignored:
00593 case RequestMode::Window:
00594 default:
00595
00596 dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_);
00597 break;
00598 }
00599 getDataBufferStats();
00600 }
00601
00602 {
00603 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00604 if (dataBuffer_.size() > 0)
00605 {
00606 dataCondition_.notify_all();
00607 }
00608 }
00609 if (!data || force_stop_)
00610 {
00611 TLOG(TLVL_INFO) << "Data flow has stopped. Ending data collection thread";
00612 data_thread_running_ = false;
00613 if (requestReceiver_) requestReceiver_->ClearRequests();
00614 dataBuffer_.clear();
00615 newDataBuffer_.clear();
00616 return;
00617 }
00618 }
00619 }
00620
00621 bool artdaq::CommandableFragmentGenerator::waitForDataBufferReady()
00622 {
00623 auto startwait = std::chrono::steady_clock::now();
00624 auto first = true;
00625 auto lastwaittime = 0ULL;
00626 while (dataBufferIsTooLarge())
00627 {
00628 if (should_stop())
00629 {
00630 TLOG(TLVL_DEBUG) << "Run ended while waiting for buffer to shrink!";
00631 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00632 getDataBufferStats();
00633 dataCondition_.notify_all();
00634 data_thread_running_ = false;
00635 return false;
00636 }
00637 auto waittime = TimeUtils::GetElapsedTimeMilliseconds(startwait);
00638
00639 if (first || (waittime != lastwaittime && waittime % 1000 == 0))
00640 {
00641 TLOG(TLVL_WARNING) << "Bad Omen: Data Buffer has exceeded its size limits. "
00642 << "(seq_id=" << ev_counter()
00643 << ", frags=" << dataBufferDepthFragments_ << "/" << maxDataBufferDepthFragments_
00644 << ", szB=" << dataBufferDepthBytes_ << "/" << maxDataBufferDepthBytes_ << ")";
00645 TLOG(TLVL_TRACE) << "Bad Omen: Possible causes include requests not getting through or Ignored-mode BR issues";
00646 first = false;
00647 }
00648 if (waittime % 5 && waittime != lastwaittime)
00649 {
00650 TLOG(TLVL_WAITFORBUFFERREADY) << "getDataLoop: Data Retreival paused for " << waittime << " ms waiting for data buffer to drain";
00651 }
00652 lastwaittime = waittime;
00653 usleep(1000);
00654 }
00655 return true;
00656 }
00657
00658 bool artdaq::CommandableFragmentGenerator::dataBufferIsTooLarge()
00659 {
00660 return (maxDataBufferDepthFragments_ > 0 && dataBufferDepthFragments_ > maxDataBufferDepthFragments_) || (maxDataBufferDepthBytes_ > 0 && dataBufferDepthBytes_ > maxDataBufferDepthBytes_);
00661 }
00662
00663 void artdaq::CommandableFragmentGenerator::getDataBufferStats()
00664 {
00666 dataBufferDepthFragments_ = dataBuffer_.size();
00667 size_t acc = 0;
00668 TLOG(TLVL_GETBUFFERSTATS) << "getDataBufferStats: Calculating buffer size";
00669 for (auto i = dataBuffer_.begin(); i != dataBuffer_.end(); ++i)
00670 {
00671 if (i->get() != nullptr)
00672 {
00673 acc += (*i)->sizeBytes();
00674 }
00675 }
00676 dataBufferDepthBytes_ = acc;
00677
00678 if (metricMan)
00679 {
00680 TLOG(TLVL_GETBUFFERSTATS) << "getDataBufferStats: Sending Metrics";
00681 metricMan->sendMetric("Buffer Depth Fragments", dataBufferDepthFragments_.load(), "fragments", 1, MetricMode::LastPoint);
00682 metricMan->sendMetric("Buffer Depth Bytes", dataBufferDepthBytes_.load(), "bytes", 1, MetricMode::LastPoint);
00683 }
00684 TLOG(TLVL_GETBUFFERSTATS) << "getDataBufferStats: frags=" << dataBufferDepthFragments_.load() << "/" << maxDataBufferDepthFragments_
00685 << ", sz=" << dataBufferDepthBytes_.load() << "/" << maxDataBufferDepthBytes_;
00686 }
00687
00688 void artdaq::CommandableFragmentGenerator::checkDataBuffer()
00689 {
00690 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00691 dataCondition_.wait_for(lock, std::chrono::milliseconds(10));
00692 if (dataBufferDepthFragments_ > 0)
00693 {
00694 if ((mode_ == RequestMode::Buffer || mode_ == RequestMode::Window))
00695 {
00696
00697 while (dataBufferIsTooLarge())
00698 {
00699 dataBuffer_.erase(dataBuffer_.begin());
00700 getDataBufferStats();
00701 }
00702 if (dataBuffer_.size() > 0)
00703 {
00704 TLOG(TLVL_CHECKDATABUFFER) << "Determining if Fragments can be dropped from data buffer";
00705 Fragment::timestamp_t last = dataBuffer_.back()->timestamp();
00706 Fragment::timestamp_t min = last > staleTimeout_ ? last - staleTimeout_ : 0;
00707 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00708 {
00709 if ((*it)->timestamp() < min)
00710 {
00711 it = dataBuffer_.erase(it);
00712 }
00713 else
00714 {
00715 ++it;
00716 }
00717 }
00718 getDataBufferStats();
00719 }
00720 }
00721 else if (mode_ == RequestMode::Single && dataBuffer_.size() > fragment_ids_.size())
00722 {
00723
00724 while (dataBuffer_.size() > fragment_ids_.size())
00725 {
00726 dataBuffer_.erase(dataBuffer_.begin());
00727 }
00728 }
00729 }
00730 }
00731
00732 void artdaq::CommandableFragmentGenerator::getMonitoringDataLoop()
00733 {
00734 while (!force_stop_)
00735 {
00736 if (should_stop() || monitoringInterval_ <= 0)
00737 {
00738 TLOG(TLVL_DEBUG) << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
00739 << " and monitoringInterval is " << monitoringInterval_ << ", returning";
00740 return;
00741 }
00742 TLOG(TLVL_GETMONITORINGDATA) << "getMonitoringDataLoop: Determining whether to call checkHWStatus_";
00743
00744 auto now = std::chrono::steady_clock::now();
00745 if (TimeUtils::GetElapsedTimeMicroseconds(lastMonitoringCall_, now) >= static_cast<size_t>(monitoringInterval_))
00746 {
00747 isHardwareOK_ = checkHWStatus_();
00748 TLOG(TLVL_GETMONITORINGDATA) << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_;
00749 lastMonitoringCall_ = now;
00750 }
00751 usleep(monitoringInterval_ / 10);
00752 }
00753 }
00754
00755 void artdaq::CommandableFragmentGenerator::applyRequestsIgnoredMode(artdaq::FragmentPtrs& frags)
00756 {
00757
00758 TLOG(TLVL_APPLYREQUESTS) << "Mode is Ignored; Copying data to output";
00759 std::move(dataBuffer_.begin(), dataBuffer_.end(), std::inserter(frags, frags.end()));
00760 dataBuffer_.clear();
00761 }
00762
00763 void artdaq::CommandableFragmentGenerator::applyRequestsSingleMode(artdaq::FragmentPtrs& frags)
00764 {
00765
00766 auto requests = requestReceiver_->GetRequests();
00767 while (requests.size() > 1)
00768 {
00769
00770 requestReceiver_->RemoveRequest(requests.begin()->first);
00771 requests.erase(requests.begin());
00772 }
00773 sendEmptyFragments(frags, requests);
00774
00775
00776 if (requests.size() == 0 || !requests.count(ev_counter())) return;
00777
00778 if (dataBuffer_.size() > 0)
00779 {
00780 TLOG(TLVL_APPLYREQUESTS) << "Mode is Single; Sending copy of last event";
00781 for (auto& fragptr : dataBuffer_)
00782 {
00783
00784 auto frag = fragptr.get();
00785 auto newfrag = std::unique_ptr<artdaq::Fragment>(new Fragment(ev_counter(), frag->fragmentID()));
00786 newfrag->resize(frag->size() - detail::RawFragmentHeader::num_words());
00787 memcpy(newfrag->headerAddress(), frag->headerAddress(), frag->sizeBytes());
00788 newfrag->setTimestamp(requests[ev_counter()]);
00789 newfrag->setSequenceID(ev_counter());
00790 frags.push_back(std::move(newfrag));
00791 }
00792 }
00793 else
00794 {
00795 sendEmptyFragment(frags, ev_counter(), "No data for");
00796 }
00797 requestReceiver_->RemoveRequest(ev_counter());
00798 ev_counter_inc(1, true);
00799 }
00800
00801 void artdaq::CommandableFragmentGenerator::applyRequestsBufferMode(artdaq::FragmentPtrs& frags)
00802 {
00803
00804 auto requests = requestReceiver_->GetRequests();
00805 while (requests.size() > 1)
00806 {
00807
00808 requestReceiver_->RemoveRequest(requests.begin()->first);
00809 requests.erase(requests.begin());
00810 }
00811 sendEmptyFragments(frags, requests);
00812
00813
00814 if (requests.size() == 0 || !requests.count(ev_counter())) return;
00815
00816 TLOG(TLVL_DEBUG) << "Creating ContainerFragment for Buffered Fragments";
00817 frags.emplace_back(new artdaq::Fragment(ev_counter(), fragment_id()));
00818 frags.back()->setTimestamp(requests[ev_counter()]);
00819 ContainerFragmentLoader cfl(*frags.back());
00820 cfl.set_missing_data(false);
00821
00822
00823
00824 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00825 {
00826 TLOG(TLVL_APPLYREQUESTS) << "ApplyRequests: Adding Fragment with timestamp " << (*it)->timestamp() << " to Container";
00827 cfl.addFragment(*it);
00828 it = dataBuffer_.erase(it);
00829 }
00830 requestReceiver_->RemoveRequest(ev_counter());
00831 ev_counter_inc(1, true);
00832 }
00833
00834 void artdaq::CommandableFragmentGenerator::applyRequestsWindowMode(artdaq::FragmentPtrs& frags)
00835 {
00836 TLOG(TLVL_APPLYREQUESTS) << "applyRequestsWindowMode BEGIN";
00837
00838 auto requests = requestReceiver_->GetRequests();
00839
00840 TLOG(TLVL_APPLYREQUESTS) << "applyRequestsWindowMode: Starting request processing";
00841 for (auto req = requests.begin(); req != requests.end();)
00842 {
00843 TLOG(TLVL_APPLYREQUESTS) << "applyRequestsWindowMode: processing request with sequence ID " << req->first << ", timestamp " << req->second;
00844
00845
00846 while (req->first < ev_counter() && requests.size() > 0)
00847 {
00848 TLOG(TLVL_APPLYREQUESTS) << "applyRequestsWindowMode: Clearing passed request for sequence ID " << req->first;
00849 requestReceiver_->RemoveRequest(req->first);
00850 req = requests.erase(req);
00851 }
00852 if (requests.size() == 0) break;
00853
00854 auto ts = req->second;
00855 TLOG(TLVL_APPLYREQUESTS) << "ApplyRequests: Checking that data exists for request window " << req->first;
00856 Fragment::timestamp_t min = ts > windowOffset_ ? ts - windowOffset_ : 0;
00857 Fragment::timestamp_t max = min + windowWidth_;
00858 TLOG(TLVL_APPLYREQUESTS) << "ApplyRequests: min is " << min << ", max is " << max
00859 << " and last point in buffer is " << (dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0) << " (sz=" << dataBuffer_.size() << ")";
00860 bool windowClosed = dataBuffer_.size() > 0 && dataBuffer_.back()->timestamp() >= max;
00861 bool windowTimeout = !windowClosed && TimeUtils::GetElapsedTimeMicroseconds(requestReceiver_->GetRequestTime(req->first)) > window_close_timeout_us_;
00862 if (windowTimeout)
00863 {
00864 TLOG(TLVL_WARNING) << "A timeout occurred waiting for data to close the request window ({" << min << "-" << max
00865 << "}, buffer={" << (dataBuffer_.size() > 0 ? dataBuffer_.front()->timestamp() : 0) << "-"
00866 << (dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0)
00867 << "} ). Time waiting: "
00868 << TimeUtils::GetElapsedTimeMicroseconds(requestReceiver_->GetRequestTime(req->first)) << " us "
00869 << "(> " << window_close_timeout_us_ << " us).";
00870 }
00871 if (windowClosed || !data_thread_running_ || windowTimeout)
00872 {
00873 TLOG(TLVL_DEBUG) << "Creating ContainerFragment for Buffered or Window-requested Fragments";
00874 frags.emplace_back(new artdaq::Fragment(req->first, fragment_id()));
00875 frags.back()->setTimestamp(ts);
00876 ContainerFragmentLoader cfl(*frags.back());
00877
00878 if (!windowClosed) cfl.set_missing_data(true);
00879 if (dataBuffer_.size() > 0 && dataBuffer_.front()->timestamp() > min)
00880 {
00881 TLOG(TLVL_DEBUG) << "Request Window covers data that is either before data collection began or has fallen off the end of the buffer";
00882 cfl.set_missing_data(true);
00883 }
00884
00885
00886
00887 for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
00888 {
00889 Fragment::timestamp_t fragT = (*it)->timestamp();
00890 if (fragT < min || fragT > max || (fragT == max && windowWidth_ > 0))
00891 {
00892 ++it;
00893 continue;
00894 }
00895
00896 TLOG(TLVL_APPLYREQUESTS) << "ApplyRequests: Adding Fragment with timestamp " << (*it)->timestamp() << " to Container";
00897 cfl.addFragment(*it);
00898
00899 if (uniqueWindows_)
00900 {
00901 it = dataBuffer_.erase(it);
00902 }
00903 else
00904 {
00905 ++it;
00906 }
00907 }
00908 requestReceiver_->RemoveRequest(req->first);
00909 checkOutOfOrderWindows(req->first);
00910 requestReceiver_->RemoveRequest(req->first);
00911 req = requests.erase(req);
00912 }
00913 else
00914 {
00915 ++req;
00916 }
00917 }
00918 }
00919
00920 bool artdaq::CommandableFragmentGenerator::applyRequests(artdaq::FragmentPtrs& frags)
00921 {
00922 if (check_stop() || exception())
00923 {
00924 return false;
00925 }
00926
00927
00928 if (mode_ == RequestMode::Ignored)
00929 {
00930 while (dataBufferDepthFragments_ <= 0)
00931 {
00932 if (check_stop() || exception() || !isHardwareOK_) return false;
00933 std::unique_lock<std::mutex> lock(dataBufferMutex_);
00934 dataCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return dataBufferDepthFragments_ > 0; });
00935 }
00936 }
00937 else
00938 {
00939 if ((check_stop() && requestReceiver_->size() == 0) || exception()) return false;
00940 checkDataBuffer();
00941
00942
00943 auto counter = 0;
00944
00945 while (requestReceiver_->size() == 0 && counter < 100)
00946 {
00947 if (check_stop() || exception()) return false;
00948
00949 checkDataBuffer();
00950
00951 requestReceiver_->WaitForRequests(10);
00952 counter++;
00953 }
00954 }
00955
00956 {
00957 std::unique_lock<std::mutex> dlk(dataBufferMutex_);
00958
00959 switch (mode_)
00960 {
00961 case RequestMode::Single:
00962 applyRequestsSingleMode(frags);
00963 break;
00964 case RequestMode::Window:
00965 applyRequestsWindowMode(frags);
00966 break;
00967 case RequestMode::Buffer:
00968 applyRequestsBufferMode(frags);
00969 break;
00970 case RequestMode::Ignored:
00971 default:
00972 applyRequestsIgnoredMode(frags);
00973 break;
00974 }
00975
00976 getDataBufferStats();
00977 }
00978
00979 if (frags.size() > 0)
00980 TLOG(TLVL_APPLYREQUESTS) << "Finished Processing Event " << (*frags.begin())->sequenceID() << " for fragment_id " << fragment_id() << ".";
00981 return true;
00982 }
00983
00984 bool artdaq::CommandableFragmentGenerator::sendEmptyFragment(artdaq::FragmentPtrs& frags, size_t seqId, std::string desc)
00985 {
00986 TLOG(TLVL_WARNING) << desc << " sequence ID " << seqId << ", sending empty fragment";
00987 for (auto fid : fragment_ids_)
00988 {
00989 auto frag = new Fragment();
00990 frag->setSequenceID(seqId);
00991 frag->setFragmentID(fid);
00992 frag->setSystemType(Fragment::EmptyFragmentType);
00993 frags.emplace_back(FragmentPtr(frag));
00994 }
00995 return true;
00996 }
00997
00998 void artdaq::CommandableFragmentGenerator::sendEmptyFragments(artdaq::FragmentPtrs& frags, std::map<Fragment::sequence_id_t, Fragment::timestamp_t>& requests)
00999 {
01000 if (requests.size() > 0)
01001 {
01002 TLOG(TLVL_SENDEMPTYFRAGMENTS) << "Sending Empty Fragments for Sequence IDs from " << ev_counter() << " up to but not including " << requests.begin()->first;
01003 while (requests.begin()->first > ev_counter())
01004 {
01005 sendEmptyFragment(frags, ev_counter(), "Missed request for");
01006 ev_counter_inc(1, true);
01007 }
01008 }
01009 }
01010
01011 void artdaq::CommandableFragmentGenerator::checkOutOfOrderWindows(artdaq::Fragment::sequence_id_t seq)
01012 {
01013 windows_sent_ooo_[seq] = std::chrono::steady_clock::now();
01014
01015 auto it = windows_sent_ooo_.begin();
01016 while (it != windows_sent_ooo_.end())
01017 {
01018 if (seq == it->first && it->first == ev_counter())
01019 {
01020 TLOG(TLVL_CHECKWINDOWS) << "checkOutOfOrderWindows: Sequence ID matches ev_counter, incrementing ev_counter (" << ev_counter() << ")";
01021 ev_counter_inc(1, true);
01022 it = windows_sent_ooo_.erase(it);
01023 }
01024 else if (it->first <= ev_counter())
01025 {
01026 TLOG(TLVL_CHECKWINDOWS) << "checkOutOfOrderWindows: Data-taking has caught up to out-of-order window request " << it->first << ", removing from list. ev_counter=" << ev_counter();
01027 requestReceiver_->RemoveRequest(ev_counter());
01028 if (it->first == ev_counter()) ev_counter_inc(1, true);
01029 it = windows_sent_ooo_.erase(it);
01030 }
01031 else if (TimeUtils::GetElapsedTimeMicroseconds(it->second) > missing_request_window_timeout_us_)
01032 {
01033 TLOG(TLVL_CHECKWINDOWS) << "checkOutOfOrderWindows: Out-of-order window " << it->first << " has timed out, setting current sequence ID and removing from list";
01034 while (ev_counter() <= it->first)
01035 {
01036 if (ev_counter() < it->first) TLOG(TLVL_WARNING) << "Missed request for sequence ID " << ev_counter() << "! Will not send any data for this sequence ID!";
01037 requestReceiver_->RemoveRequest(ev_counter());
01038 ev_counter_inc(1, true);
01039 }
01040 windows_sent_ooo_.erase(windows_sent_ooo_.begin(), it);
01041 it = windows_sent_ooo_.erase(it);
01042 }
01043 else
01044 {
01045 ++it;
01046 }
01047 }
01048 }