00001 #include <chrono>
00002
00003 #include "artdaq/DAQrate/DataReceiverManager.hh"
00004 #include "artdaq/DAQdata/Globals.hh"
00005 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
00006
00007 artdaq::DataReceiverManager::DataReceiverManager(const fhicl::ParameterSet& pset)
00008 : stop_requested_(false)
00009 , source_threads_()
00010 , source_plugins_()
00011 , enabled_sources_()
00012 , suppressed_sources_()
00013 , fragment_store_()
00014 , recv_frag_count_()
00015 , recv_frag_size_()
00016 , recv_seq_count_()
00017 , suppress_noisy_senders_(pset.get<bool>("auto_suppression_enabled",true))
00018 , suppression_threshold_(pset.get<size_t>("max_receive_difference", 50))
00019 , receive_timeout_(pset.get<size_t>("receive_timeout_usec", 100000))
00020 {
00021 TLOG_DEBUG("DataReceiverManager") << "Constructor" << TLOG_ENDL;
00022 auto enabled_srcs = pset.get<std::vector<int>>("enabled_sources", std::vector<int>());
00023 auto enabled_srcs_empty = enabled_srcs.size() == 0;
00024 if (enabled_srcs_empty)
00025 {
00026 TLOG_INFO("DataReceiverManager") << "enabled_sources not specified, assuming all sources enabled." << TLOG_ENDL;
00027 }
00028 else
00029 {
00030 for (auto& s : enabled_srcs)
00031 {
00032 enabled_sources_.insert(s);
00033 }
00034 }
00035
00036 auto srcs = pset.get<fhicl::ParameterSet>("sources", fhicl::ParameterSet());
00037 for (auto& s : srcs.get_pset_names())
00038 {
00039 try
00040 {
00041 auto ss = srcs.get<fhicl::ParameterSet>(s).get<int>("source_rank");
00042 if (enabled_srcs_empty) enabled_sources_.insert(ss);
00043 source_plugins_[ss] = std::unique_ptr<TransferInterface>(MakeTransferPlugin(srcs, s, TransferInterface::Role::kReceive));
00044 fragment_store_[ss];
00045 }
00046 catch (cet::exception ex)
00047 {
00048 TLOG_WARNING("DataReceiverManager") << "cet::exception caught while setting up source " << s << ": " << ex.what() << TLOG_ENDL;
00049 }
00050 catch (std::exception ex)
00051 {
00052 TLOG_WARNING("DataReceiverManager") << "std::exception caught while setting up source " << s << ": " << ex.what() << TLOG_ENDL;
00053 }
00054 catch (...)
00055 {
00056 TLOG_WARNING("DataReceiverManager") << "Non-cet exception caught while setting up source " << s << "." << TLOG_ENDL;
00057 }
00058 }
00059 if (srcs.get_pset_names().size() == 0)
00060 {
00061 TLOG_ERROR("DataReceiverManager") << "No sources configured!" << TLOG_ENDL;
00062 }
00063 }
00064
00065 artdaq::DataReceiverManager::~DataReceiverManager()
00066 {
00067 TLOG_DEBUG("DataReceiverManager") << "Destructor" << TLOG_ENDL;
00068 TRACE(5, "~DataReceiverManager: BEGIN: Setting stop_requested to true, frags=%zu, bytes=%zu", count(), byteCount());
00069 stop_requested_ = true;
00070
00071 TRACE(5, "~DataReceiverManager: Notifying all threads");
00072 output_cv_.notify_all();
00073
00074 TRACE(5, "~DataReceiverManager: Joining all threads");
00075 for (auto& s : source_threads_)
00076 {
00077 auto& thread = s.second;
00078 if (thread.joinable()) thread.join();
00079 }
00080 TRACE(5, "~DataReceiverManager: DONE");
00081 }
00082
00083 bool artdaq::DataReceiverManager::fragments_ready_() const
00084 {
00085 return get_next_source_() != -1;
00086 }
00087
00088 int artdaq::DataReceiverManager::get_next_source_() const
00089 {
00090
00091 for (auto& it : fragment_store_)
00092 {
00093 if (!enabled_sources_.count(it.first) || suppressed_sources_.count(it.first)) continue;
00094 if (!it.second.empty()) return it.first;
00095 }
00096 return -1;
00097 }
00098
00099 void artdaq::DataReceiverManager::unsuppressAll()
00100 {
00101 suppressed_sources_.clear();
00102 output_cv_.notify_all();
00103 }
00104
00105 void artdaq::DataReceiverManager::suppress_source(int source)
00106 {
00107 suppressed_sources_.insert(source);
00108 }
00109
00110 void artdaq::DataReceiverManager::reject_fragment(int source_rank, FragmentPtr frag)
00111 {
00112 if (frag == nullptr) return;
00113 suppress_source(source_rank);
00114 fragment_store_[source_rank].emplace_front(std::move(frag));
00115 }
00116
00117 void artdaq::DataReceiverManager::start_threads()
00118 {
00119 for (auto& source : source_plugins_)
00120 {
00121 auto& rank = source.first;
00122 if (enabled_sources_.count(rank))
00123 {
00124 source_threads_[rank] = std::thread(&DataReceiverManager::runReceiver_, this, rank);
00125 }
00126 }
00127 }
00128
00129 artdaq::FragmentPtr artdaq::DataReceiverManager::recvFragment(int& rank, size_t timeout_usec)
00130 {
00131 TRACE(5, "DataReceiverManager::recvFragment entered tmo=%zu us", timeout_usec);
00132
00133 if (timeout_usec == 0) timeout_usec = 1000000;
00134
00135 auto ready = fragments_ready_();
00136 size_t waited = 0;
00137 auto wait_amount = timeout_usec / 1000 > 1000 ? timeout_usec / 1000 : 1000;
00138 TRACE(5, "DataReceiverManager::recvFragment fragment_ready_=%d before wait", ready);
00139 while (!ready && waited < timeout_usec)
00140 {
00141 {
00142 std::unique_lock<std::mutex> lck(input_cv_mutex_);
00143 input_cv_.wait_for(lck, std::chrono::microseconds(wait_amount));
00144 }
00145 waited += wait_amount;
00146 ready = fragments_ready_();
00147 }
00148 TRACE(5, "DataReceiverManager::recvFragment fragment_ready_=%d after waited=%zu", ready, waited);
00149 if (!ready)
00150 {
00151 TRACE(5, "DataReceiverManager::recvFragment: No fragments ready, returning empty");
00152 rank = TransferInterface::RECV_TIMEOUT;
00153 return std::unique_ptr<Fragment>{};
00154 }
00155
00156 int current_source = get_next_source_();
00157 FragmentPtr current_fragment = fragment_store_[current_source].front();
00158 output_cv_.notify_all();
00159 rank = current_source;
00160
00161 if (current_fragment != nullptr)
00162 TRACE(5, "DataReceiverManager::recvFragment: Done rank=%d, fragment size=%zu words, seqId=%zu", rank, current_fragment->size(), current_fragment->sequenceID());
00163 return std::move(current_fragment);
00164 }
00165
00166 void artdaq::DataReceiverManager::runReceiver_(int source_rank)
00167 {
00168 while (!stop_requested_ && enabled_sources_.count(source_rank))
00169 {
00170 TRACE(16, "DataReceiverManager::runReceiver_: Begin loop");
00171 auto is_suppressed = (suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount()) || suppressed_sources_.count(source_rank) > 0;
00172 while (!stop_requested_ && is_suppressed)
00173 {
00174 TRACE(6, "DataReceiverManager::runReceiver_: Suppressing receiver rank %d", source_rank);
00175 if (!is_suppressed) input_cv_.notify_all();
00176 else
00177 {
00178 std::unique_lock<std::mutex> lck(output_cv_mutex_);
00179 output_cv_.wait_for(lck, std::chrono::seconds(1));
00180 }
00181 is_suppressed = (suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount()) || suppressed_sources_.count(source_rank) > 0;
00182 }
00183 if (stop_requested_) return;
00184
00185 auto start_time = std::chrono::steady_clock::now();
00186 TRACE(16, "DataReceiverManager::runReceiver_: Calling receiveFragment");
00187 auto fragment = std::unique_ptr<Fragment>(new Fragment());
00188 auto ret = source_plugins_[source_rank]->receiveFragment(*fragment, receive_timeout_);
00189 TRACE(16, "DataReceiverManager::runReceiver_: Done with receiveFragment, ret=%d (should be %d)", ret, source_rank);
00190
00191 if (ret != source_rank) continue;
00192
00193 recv_frag_count_.incSlot(source_rank);
00194 recv_frag_size_.incSlot(source_rank, fragment->size() * sizeof(RawDataType));
00195 recv_seq_count_.setSlot(source_rank, fragment->sequenceID());
00196
00197 bool endOfData = fragment->type() == artdaq::Fragment::EndOfDataFragmentType;
00198
00199 if (metricMan)
00200 {
00201 TRACE(6, "DataReceiverManager::runReceiver_: Sending receive stats");
00202 auto delta_t = std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(std::chrono::steady_clock::now() - start_time).count();
00203 metricMan->sendMetric("Data Receive Time From Rank " + std::to_string(source_rank), delta_t, "s", 1);
00204 metricMan->sendMetric("Data Receive Size From Rank " + std::to_string(source_rank), static_cast<unsigned long>(fragment->size() * sizeof(RawDataType)), "B", 1);
00205 metricMan->sendMetric("Data Receive Rate From Rank " + std::to_string(source_rank), fragment->size() * sizeof(RawDataType) / delta_t, "B/s", 1);
00206 }
00207
00208 if (stop_requested_) return;
00209
00210 fragment_store_[source_rank].emplace_back(std::move(fragment));
00211 input_cv_.notify_all();
00212
00213 if (endOfData)
00214 {
00215 return;
00216 }
00217 }
00218 }