1 #include "artdaq/DAQdata/Globals.hh"
2 #define TRACE_NAME (app_name + "_FragmentReceiverManager").c_str()
7 #include "artdaq/DAQrate/FragmentReceiverManager.hh"
8 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
9 #include "cetlib_except/exception.h"
12 : stop_requested_(false)
16 , suppress_noisy_senders_(pset.get<bool>(
"auto_suppression_enabled", true))
17 , suppression_threshold_(pset.get<size_t>(
"max_receive_difference", 50))
18 , receive_timeout_(pset.get<size_t>(
"receive_timeout_usec", 100000))
21 TLOG(TLVL_DEBUG + 32) <<
"Constructor";
22 auto enabled_srcs = pset.get<std::vector<int>>(
"enabled_sources", std::vector<int>());
23 auto enabled_srcs_empty = enabled_srcs.empty();
24 if (enabled_srcs_empty)
26 TLOG(TLVL_INFO) <<
"enabled_sources not specified, assuming all sources enabled.";
30 for (
auto& s : enabled_srcs)
32 enabled_sources_[s] =
true;
36 auto srcs = pset.get<fhicl::ParameterSet>(
"sources", fhicl::ParameterSet());
37 for (
auto& s : srcs.get_pset_names())
43 auto source_rank = transfer->source_rank();
44 if (enabled_srcs_empty)
46 enabled_sources_[source_rank] =
true;
48 else if (enabled_sources_.count(source_rank) == 0u)
50 enabled_sources_[source_rank] =
false;
52 running_sources_[source_rank] =
false;
53 source_plugins_[source_rank] = std::move(transfer);
54 fragment_store_[source_rank];
55 source_metric_send_time_[source_rank] = std::chrono::steady_clock::now();
56 source_metric_data_[source_rank] = std::pair<size_t, double>();
58 catch (
const cet::exception& ex)
60 TLOG(TLVL_WARNING) <<
"cet::exception caught while setting up source " << s <<
": " << ex.what();
62 catch (
const std::exception& ex)
64 TLOG(TLVL_WARNING) <<
"std::exception caught while setting up source " << s <<
": " << ex.what();
68 TLOG(TLVL_WARNING) <<
"Non-cet exception caught while setting up source " << s <<
".";
71 if (srcs.get_pset_names().empty())
73 TLOG(TLVL_ERROR) <<
"No sources configured!";
79 TLOG(TLVL_DEBUG + 32) <<
"Destructor";
80 TLOG(TLVL_DEBUG + 34) <<
"~FragmentReceiverManager: BEGIN: Setting stop_requested to true, frags=" << count() <<
", bytes=" << byteCount();
81 stop_requested_ =
true;
83 TLOG(TLVL_DEBUG + 34) <<
"~FragmentReceiverManager: Notifying all threads";
84 output_cv_.notify_all();
86 TLOG(TLVL_DEBUG + 34) <<
"~FragmentReceiverManager: Joining all threads";
87 for (
auto& s : source_threads_)
89 auto& thread = s.second;
92 if (thread.joinable())
102 TLOG(TLVL_DEBUG + 34) <<
"~FragmentReceiverManager: DONE";
105 bool artdaq::FragmentReceiverManager::fragments_ready_()
const
107 for (
auto& it : fragment_store_)
109 if (enabled_sources_.count(it.first) == 0u)
113 if (!it.second.empty()) {
return true; }
118 int artdaq::FragmentReceiverManager::get_next_source_()
const
121 std::set<int> ready_sources;
122 for (
auto& it : fragment_store_)
124 if (enabled_sources_.count(it.first) == 0u)
128 if (!it.second.empty())
130 ready_sources.insert(it.first);
134 if (!ready_sources.empty())
136 auto iter = ready_sources.find(last_source_);
137 if (iter == ready_sources.end() || ++iter == ready_sources.end())
139 TLOG(TLVL_DEBUG + 35) <<
"get_next_source returning " << *ready_sources.begin();
140 last_source_ = *ready_sources.begin();
141 return *ready_sources.begin();
144 TLOG(TLVL_DEBUG + 35) <<
"get_next_source returning " << *iter;
145 last_source_ = *iter;
149 TLOG(TLVL_DEBUG + 35) <<
"get_next_source returning -1";
155 for (
auto& source : source_plugins_)
157 auto& rank = source.first;
158 if (enabled_sources_.count(rank) != 0u)
160 running_sources_[rank] =
true;
163 source_threads_[rank] = boost::thread(&FragmentReceiverManager::runReceiver_,
this, rank);
165 snprintf(tname,
sizeof(tname) - 1,
"%d-%d FRecv", rank, my_rank);
166 tname[
sizeof(tname) - 1] =
'\0';
167 auto handle = source_threads_[rank].native_handle();
168 pthread_setname_np(handle, tname);
170 catch (
const boost::exception& e)
172 TLOG(TLVL_ERROR) <<
"Caught boost::exception starting Receiver " << rank <<
" thread: " << boost::diagnostic_information(e) <<
", errno=" << errno;
173 std::cerr <<
"Caught boost::exception starting Receiver " << rank <<
" thread: " << boost::diagnostic_information(e) <<
", errno=" << errno << std::endl;
182 TLOG(TLVL_DEBUG + 34) <<
"recvFragment entered tmo=" << timeout_usec <<
" us";
184 if (timeout_usec == 0)
186 timeout_usec = 1000000;
189 auto ready = fragments_ready_();
191 auto wait_amount = timeout_usec / 1000 > 1000 ? timeout_usec / 1000 : 1000;
192 TLOG(TLVL_DEBUG + 34) <<
"recvFragment fragment_ready_=" << ready <<
" before wait";
193 while (!ready && waited < timeout_usec)
196 std::unique_lock<std::mutex> lck(input_cv_mutex_);
197 input_cv_.wait_for(lck, std::chrono::microseconds(wait_amount));
199 waited += wait_amount;
200 ready = fragments_ready_();
201 if (running_sources().empty())
206 TLOG(TLVL_DEBUG + 34) <<
"recvFragment fragment_ready_=" << ready <<
" after waited=" << waited;
209 TLOG(TLVL_DEBUG + 34) <<
"recvFragment: No fragments ready, returning empty";
211 return std::unique_ptr<Fragment>{};
214 int current_source = get_next_source_();
215 FragmentPtr current_fragment = fragment_store_[current_source].front();
216 output_cv_.notify_all();
217 rank = current_source;
219 if (current_fragment !=
nullptr)
221 TLOG(TLVL_DEBUG + 34) <<
"recvFragment: Done rank=" << rank <<
", fragment size=" << std::to_string(current_fragment->size()) <<
" words, seqId=" << current_fragment->sequenceID();
223 return current_fragment;
228 std::set<int> output;
229 for (
auto& src : running_sources_)
233 output.insert(src.first);
241 std::set<int> output;
242 for (
auto& src : enabled_sources_)
246 output.insert(src.first);
252 void artdaq::FragmentReceiverManager::runReceiver_(
int source_rank)
254 while (!stop_requested_ && (enabled_sources_.count(source_rank) != 0u))
256 TLOG(TLVL_DEBUG + 36) <<
"runReceiver_ " << source_rank <<
": Begin loop";
257 auto is_suppressed = suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount();
258 while (!stop_requested_ && is_suppressed)
260 TLOG(TLVL_DEBUG + 37) <<
"runReceiver_: Suppressing receiver rank " << source_rank;
263 input_cv_.notify_all();
267 std::unique_lock<std::mutex> lck(output_cv_mutex_);
268 output_cv_.wait_for(lck, std::chrono::seconds(1));
270 is_suppressed = suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount();
274 running_sources_[source_rank] =
false;
278 if (fragment_store_[source_rank].GetEndOfData() <= recv_frag_count_.slotCount(source_rank) && !source_plugins_[source_rank]->isRunning())
280 TLOG(TLVL_DEBUG + 32) <<
"runReceiver_: EndOfData conditions satisfied, ending receive loop";
281 running_sources_[source_rank] =
false;
285 auto start_time = std::chrono::steady_clock::now();
286 TLOG(TLVL_DEBUG + 36) <<
"runReceiver_: Calling receiveFragment";
287 auto fragment = std::make_unique<Fragment>();
289 auto ret = source_plugins_[source_rank]->receiveFragment(*fragment, receive_timeout_);
290 TLOG(TLVL_DEBUG + 36) <<
"runReceiver_: Done with receiveFragment, ret=" << ret <<
" (should be " << source_rank <<
")";
291 if (ret != source_rank)
continue;
293 artdaq::detail::RawFragmentHeader hdr;
294 auto ret1 = source_plugins_[source_rank]->receiveFragmentHeader(hdr, receive_timeout_);
295 TLOG(TLVL_DEBUG + 36) <<
"runReceiver_: Done with receiveFragmentHeader, ret1=" << ret1 <<
" (should be " << source_rank <<
")";
297 if (ret1 != source_rank)
302 fragment->resize(hdr.word_count - hdr.num_words());
303 memcpy(fragment->headerAddress(), &hdr, hdr.num_words() *
sizeof(artdaq::RawDataType));
304 auto ret2 = source_plugins_[source_rank]->receiveFragmentData(fragment->headerAddress() + hdr.num_words(), hdr.word_count - hdr.num_words());
307 TLOG(TLVL_ERROR) <<
"ReceiveFragmentHeader returned " << ret1 <<
", but ReceiveFragmentData returned " << ret2;
312 if (fragment->type() == artdaq::Fragment::EndOfDataFragmentType)
314 TLOG(TLVL_DEBUG + 33) <<
"runReceiver_: EndOfData Fragment received!";
315 fragment_store_[source_rank].SetEndOfData(*reinterpret_cast<size_t*>(fragment->dataBegin()));
317 else if (fragment->type() == artdaq::Fragment::DataFragmentType || fragment->type() == artdaq::Fragment::ContainerFragmentType || fragment->isUserFragmentType(fragment->type()))
319 TLOG(TLVL_DEBUG + 33) <<
"runReceiver_: Data Fragment received!";
320 recv_frag_count_.incSlot(source_rank);
321 recv_frag_size_.incSlot(source_rank, fragment->size() *
sizeof(RawDataType));
322 recv_seq_count_.setSlot(source_rank, fragment->sequenceID());
329 auto delta_t = std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(std::chrono::steady_clock::now() - start_time).count();
330 source_metric_data_[source_rank].first += fragment->size() *
sizeof(RawDataType);
331 source_metric_data_[source_rank].second += delta_t;
333 if (metricMan && TimeUtils::GetElapsedTime(source_metric_send_time_[source_rank]) > 1)
335 TLOG(TLVL_DEBUG + 37) <<
"runReceiver_: Sending receive stats";
336 metricMan->sendMetric(
"Data Receive Time From Rank " + std::to_string(source_rank), source_metric_data_[source_rank].second,
"s", 1, MetricMode::Accumulate);
337 metricMan->sendMetric(
"Data Receive Size From Rank " + std::to_string(source_rank), source_metric_data_[source_rank].first,
"B", 1, MetricMode::Accumulate);
338 metricMan->sendMetric(
"Data Receive Rate From Rank " + std::to_string(source_rank), source_metric_data_[source_rank].first / source_metric_data_[source_rank].second,
"B/s", 1, MetricMode::Average);
340 source_metric_send_time_[source_rank] = std::chrono::steady_clock::now();
341 source_metric_data_[source_rank].first = 0;
342 source_metric_data_[source_rank].second = 0.0;
345 fragment_store_[source_rank].emplace_back(std::move(fragment));
346 TLOG(TLVL_DEBUG + 33) <<
"runReceiver_: There are now " << fragment_store_[source_rank].size() <<
" Fragments stored from this source";
347 input_cv_.notify_all();
350 running_sources_[source_rank] =
false;
void start_threads()
Start receiver threads for all enabled sources.
std::set< int > enabled_sources() const
Get the list of enabled sources.
virtual ~FragmentReceiverManager()
FragmentReceiverManager Destructor.
std::unique_ptr< artdaq::TransferInterface > MakeTransferPlugin(const fhicl::ParameterSet &pset, const std::string &plugin_label, TransferInterface::Role role)
Load a TransferInterface plugin.
This TransferInterface is a Receiver.
FragmentReceiverManager(const fhicl::ParameterSet &ps)
FragmentReceiverManager Constructor.
FragmentPtr recvFragment(int &rank, size_t timeout_usec=0)
Receive a Fragment.
std::set< int > running_sources() const
Get the list of sources which are still receiving data.
Value to be returned upon receive timeout.