00001 #include <chrono>
00002
00003 #include "artdaq/DAQrate/DataReceiverManager.hh"
00004 #include "artdaq/DAQdata/Globals.hh"
00005 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
00006
00007 artdaq::DataReceiverManager::DataReceiverManager(const fhicl::ParameterSet& pset)
00008 : stop_requested_(false)
00009 , source_threads_()
00010 , source_plugins_()
00011 , enabled_sources_()
00012 , suppressed_sources_()
00013 , fragment_store_()
00014 , recv_frag_count_()
00015 , recv_frag_size_()
00016 , recv_seq_count_()
00017 , suppress_noisy_senders_(pset.get<bool>("auto_suppression_enabled",true))
00018 , suppression_threshold_(pset.get<size_t>("max_receive_difference", 50))
00019 , receive_timeout_(pset.get<size_t>("receive_timeout_usec", 100000))
00020 {
00021 TLOG_DEBUG("DataReceiverManager") << "Constructor" << TLOG_ENDL;
00022 auto enabled_srcs = pset.get<std::vector<int>>("enabled_sources", std::vector<int>());
00023 auto enabled_srcs_empty = enabled_srcs.size() == 0;
00024 if (enabled_srcs_empty)
00025 {
00026 TLOG_INFO("DataReceiverManager") << "enabled_sources not specified, assuming all sources enabled." << TLOG_ENDL;
00027 }
00028 else
00029 {
00030 for (auto& s : enabled_srcs)
00031 {
00032 enabled_sources_.insert(s);
00033 }
00034 }
00035
00036 auto srcs = pset.get<fhicl::ParameterSet>("sources", fhicl::ParameterSet());
00037 for (auto& s : srcs.get_pset_names())
00038 {
00039 try
00040 {
00041 auto transfer = std::unique_ptr<TransferInterface>(MakeTransferPlugin(srcs, s,
00042 TransferInterface::Role::kReceive));
00043 auto source_rank = transfer->source_rank();
00044 if (enabled_srcs_empty) enabled_sources_.insert( source_rank );
00045 source_plugins_[ source_rank ] = std::move(transfer);
00046 fragment_store_[ source_rank ];
00047 }
00048 catch (cet::exception ex)
00049 {
00050 TLOG_WARNING("DataReceiverManager") << "cet::exception caught while setting up source " << s << ": " << ex.what() << TLOG_ENDL;
00051 }
00052 catch (std::exception ex)
00053 {
00054 TLOG_WARNING("DataReceiverManager") << "std::exception caught while setting up source " << s << ": " << ex.what() << TLOG_ENDL;
00055 }
00056 catch (...)
00057 {
00058 TLOG_WARNING("DataReceiverManager") << "Non-cet exception caught while setting up source " << s << "." << TLOG_ENDL;
00059 }
00060 }
00061 if (srcs.get_pset_names().size() == 0)
00062 {
00063 TLOG_ERROR("DataReceiverManager") << "No sources configured!" << TLOG_ENDL;
00064 }
00065 }
00066
00067 artdaq::DataReceiverManager::~DataReceiverManager()
00068 {
00069 TLOG_DEBUG("DataReceiverManager") << "Destructor" << TLOG_ENDL;
00070 TRACE(5, "~DataReceiverManager: BEGIN: Setting stop_requested to true, frags=%zu, bytes=%zu", count(), byteCount());
00071 stop_requested_ = true;
00072
00073 TRACE(5, "~DataReceiverManager: Notifying all threads");
00074 output_cv_.notify_all();
00075
00076 TRACE(5, "~DataReceiverManager: Joining all threads");
00077 for (auto& s : source_threads_)
00078 {
00079 auto& thread = s.second;
00080 if (thread.joinable()) thread.join();
00081 }
00082 TRACE(5, "~DataReceiverManager: DONE");
00083 }
00084
00085 bool artdaq::DataReceiverManager::fragments_ready_() const
00086 {
00087 return get_next_source_() != -1;
00088 }
00089
00090 int artdaq::DataReceiverManager::get_next_source_() const
00091 {
00092
00093 for (auto& it : fragment_store_)
00094 {
00095 if (!enabled_sources_.count(it.first) || suppressed_sources_.count(it.first)) continue;
00096 if (!it.second.empty()) return it.first;
00097 }
00098 return -1;
00099 }
00100
00101 void artdaq::DataReceiverManager::unsuppressAll()
00102 {
00103 suppressed_sources_.clear();
00104 output_cv_.notify_all();
00105 }
00106
00107 void artdaq::DataReceiverManager::suppress_source(int source)
00108 {
00109 suppressed_sources_.insert(source);
00110 }
00111
00112 void artdaq::DataReceiverManager::reject_fragment(int source_rank, FragmentPtr frag)
00113 {
00114 if (frag == nullptr) return;
00115 suppress_source(source_rank);
00116 fragment_store_[source_rank].emplace_front(std::move(frag));
00117 }
00118
00119 void artdaq::DataReceiverManager::start_threads()
00120 {
00121 for (auto& source : source_plugins_)
00122 {
00123 auto& rank = source.first;
00124 if (enabled_sources_.count(rank))
00125 {
00126 source_threads_[rank] = std::thread(&DataReceiverManager::runReceiver_, this, rank);
00127 }
00128 }
00129 }
00130
00131 artdaq::FragmentPtr artdaq::DataReceiverManager::recvFragment(int& rank, size_t timeout_usec)
00132 {
00133 TRACE(5, "DataReceiverManager::recvFragment entered tmo=%zu us", timeout_usec);
00134
00135 if (timeout_usec == 0) timeout_usec = 1000000;
00136
00137 auto ready = fragments_ready_();
00138 size_t waited = 0;
00139 auto wait_amount = timeout_usec / 1000 > 1000 ? timeout_usec / 1000 : 1000;
00140 TRACE(5, "DataReceiverManager::recvFragment fragment_ready_=%d before wait", ready);
00141 while (!ready && waited < timeout_usec)
00142 {
00143 {
00144 std::unique_lock<std::mutex> lck(input_cv_mutex_);
00145 input_cv_.wait_for(lck, std::chrono::microseconds(wait_amount));
00146 }
00147 waited += wait_amount;
00148 ready = fragments_ready_();
00149 }
00150 TRACE(5, "DataReceiverManager::recvFragment fragment_ready_=%d after waited=%zu", ready, waited);
00151 if (!ready)
00152 {
00153 TRACE(5, "DataReceiverManager::recvFragment: No fragments ready, returning empty");
00154 rank = TransferInterface::RECV_TIMEOUT;
00155 return std::unique_ptr<Fragment>{};
00156 }
00157
00158 int current_source = get_next_source_();
00159 FragmentPtr current_fragment = fragment_store_[current_source].front();
00160 output_cv_.notify_all();
00161 rank = current_source;
00162
00163 if (current_fragment != nullptr)
00164 TRACE(5, "DataReceiverManager::recvFragment: Done rank=%d, fragment size=%zu words, seqId=%zu", rank, current_fragment->size(), current_fragment->sequenceID());
00165 return std::move(current_fragment);
00166 }
00167
00168 void artdaq::DataReceiverManager::runReceiver_(int source_rank)
00169 {
00170 while (!stop_requested_ && enabled_sources_.count(source_rank))
00171 {
00172 TRACE(16, "DataReceiverManager::runReceiver_: Begin loop");
00173 auto is_suppressed = (suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount()) || suppressed_sources_.count(source_rank) > 0;
00174 while (!stop_requested_ && is_suppressed)
00175 {
00176 TRACE(6, "DataReceiverManager::runReceiver_: Suppressing receiver rank %d", source_rank);
00177 if (!is_suppressed) input_cv_.notify_all();
00178 else
00179 {
00180 std::unique_lock<std::mutex> lck(output_cv_mutex_);
00181 output_cv_.wait_for(lck, std::chrono::seconds(1));
00182 }
00183 is_suppressed = (suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount()) || suppressed_sources_.count(source_rank) > 0;
00184 }
00185 if (stop_requested_) return;
00186
00187 auto start_time = std::chrono::steady_clock::now();
00188 TRACE(16, "DataReceiverManager::runReceiver_: Calling receiveFragment");
00189 auto fragment = std::unique_ptr<Fragment>(new Fragment());
00190 auto ret = source_plugins_[source_rank]->receiveFragment(*fragment, receive_timeout_);
00191 TRACE(16, "DataReceiverManager::runReceiver_: Done with receiveFragment, ret=%d (should be %d)", ret, source_rank);
00192
00193 if (ret != source_rank) continue;
00194
00195 recv_frag_count_.incSlot(source_rank);
00196 recv_frag_size_.incSlot(source_rank, fragment->size() * sizeof(RawDataType));
00197 recv_seq_count_.setSlot(source_rank, fragment->sequenceID());
00198
00199 bool endOfData = fragment->type() == artdaq::Fragment::EndOfDataFragmentType;
00200
00201 if (metricMan)
00202 {
00203 TRACE(6, "DataReceiverManager::runReceiver_: Sending receive stats");
00204 auto delta_t = std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(std::chrono::steady_clock::now() - start_time).count();
00205 metricMan->sendMetric("Data Receive Time From Rank " + std::to_string(source_rank), delta_t, "s", 1);
00206 metricMan->sendMetric("Data Receive Size From Rank " + std::to_string(source_rank), static_cast<unsigned long>(fragment->size() * sizeof(RawDataType)), "B", 1);
00207 metricMan->sendMetric("Data Receive Rate From Rank " + std::to_string(source_rank), fragment->size() * sizeof(RawDataType) / delta_t, "B/s", 1);
00208 }
00209
00210 if (stop_requested_) return;
00211
00212 fragment_store_[source_rank].emplace_back(std::move(fragment));
00213 input_cv_.notify_all();
00214
00215 if (endOfData)
00216 {
00217 return;
00218 }
00219 }
00220 }