3 #include "proto/FragmentReceiverManager.hh"
4 #include "artdaq/DAQdata/Globals.hh"
5 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
6 #include "cetlib_except/exception.h"
9 : stop_requested_(false)
17 , suppress_noisy_senders_(pset.get<bool>(
"auto_suppression_enabled", true))
18 , suppression_threshold_(pset.get<size_t>(
"max_receive_difference", 50))
19 , receive_timeout_(pset.get<size_t>(
"receive_timeout_usec", 100000))
21 TLOG_DEBUG(
"FragmentReceiverManager") <<
"Constructor" << TLOG_ENDL;
22 auto enabled_srcs = pset.get<std::vector<int>>(
"enabled_sources", std::vector<int>());
23 auto enabled_srcs_empty = enabled_srcs.size() == 0;
24 if (enabled_srcs_empty)
26 TLOG_INFO(
"FragmentReceiverManager") <<
"enabled_sources not specified, assuming all sources enabled." << TLOG_ENDL;
30 for (
auto& s : enabled_srcs)
32 enabled_sources_.insert(s);
36 auto srcs = pset.get<fhicl::ParameterSet>(
"sources", fhicl::ParameterSet());
37 for (
auto& s : srcs.get_pset_names())
43 auto source_rank = transfer->source_rank();
44 if (enabled_srcs_empty) enabled_sources_.insert(source_rank);
45 source_plugins_[source_rank] = std::move(transfer);
46 fragment_store_[source_rank];
48 catch (cet::exception ex)
50 TLOG_WARNING(
"FragmentReceiverManager") <<
"cet::exception caught while setting up source " << s <<
": " << ex.what() << TLOG_ENDL;
52 catch (std::exception ex)
54 TLOG_WARNING(
"FragmentReceiverManager") <<
"std::exception caught while setting up source " << s <<
": " << ex.what() << TLOG_ENDL;
58 TLOG_WARNING(
"FragmentReceiverManager") <<
"Non-cet exception caught while setting up source " << s <<
"." << TLOG_ENDL;
61 if (srcs.get_pset_names().size() == 0)
63 TLOG_ERROR(
"FragmentReceiverManager") <<
"No sources configured!" << TLOG_ENDL;
69 TLOG_DEBUG(
"FragmentReceiverManager") <<
"Destructor" << TLOG_ENDL;
70 TLOG_ARB(5,
"FragmentReceiverManager") <<
"~FragmentReceiverManager: BEGIN: Setting stop_requested to true, frags=" << std::to_string(count()) <<
", bytes=" << std::to_string(byteCount()) << TLOG_ENDL;
71 stop_requested_ =
true;
73 TLOG_ARB(5,
"FragmentReceiverManager") <<
"~FragmentReceiverManager: Notifying all threads" << TLOG_ENDL;
74 output_cv_.notify_all();
76 TLOG_ARB(5,
"FragmentReceiverManager") <<
"~FragmentReceiverManager: Joining all threads" << TLOG_ENDL;
77 for (
auto& s : source_threads_)
79 auto& thread = s.second;
80 if (thread.joinable()) thread.join();
82 TLOG_ARB(5,
"FragmentReceiverManager") <<
"~FragmentReceiverManager: DONE" << TLOG_ENDL;
85 bool artdaq::FragmentReceiverManager::fragments_ready_()
const
87 return get_next_source_() != -1;
90 int artdaq::FragmentReceiverManager::get_next_source_()
const
93 for (
auto& it : fragment_store_)
95 if (!enabled_sources_.count(it.first))
continue;
96 if (!it.second.empty())
return it.first;
103 for (
auto& source : source_plugins_)
105 auto& rank = source.first;
106 if (enabled_sources_.count(rank))
108 source_threads_[rank] = boost::thread(&FragmentReceiverManager::runReceiver_,
this, rank);
115 TLOG_ARB(5,
"FragmentReceiverManager") <<
"recvFragment entered tmo=" << std::to_string(timeout_usec) <<
" us" << TLOG_ENDL;
117 if (timeout_usec == 0) timeout_usec = 1000000;
119 auto ready = fragments_ready_();
121 auto wait_amount = timeout_usec / 1000 > 1000 ? timeout_usec / 1000 : 1000;
122 TLOG_ARB(5,
"FragmentReceiverManager") <<
"recvFragment fragment_ready_=" << ready <<
" before wait" << TLOG_ENDL;
123 while (!ready && waited < timeout_usec)
126 std::unique_lock<std::mutex> lck(input_cv_mutex_);
127 input_cv_.wait_for(lck, std::chrono::microseconds(wait_amount));
129 waited += wait_amount;
130 ready = fragments_ready_();
132 TLOG_ARB(5,
"FragmentReceiverManager") <<
"recvFragment fragment_ready_=" << ready <<
" after waited=" << std::to_string( waited) << TLOG_ENDL;
135 TLOG_ARB(5,
"FragmentReceiverManager") <<
"recvFragment: No fragments ready, returning empty" << TLOG_ENDL;
137 return std::unique_ptr<Fragment>{};
140 int current_source = get_next_source_();
141 FragmentPtr current_fragment = fragment_store_[current_source].front();
142 output_cv_.notify_all();
143 rank = current_source;
145 if (current_fragment !=
nullptr)
146 TLOG_ARB(5,
"FragmentReceiverManager") <<
"recvFragment: Done rank="<< rank <<
", fragment size="<<std::to_string(current_fragment->size()) <<
" words, seqId=" << std::to_string( current_fragment->sequenceID()) << TLOG_ENDL;
147 return std::move(current_fragment);
150 void artdaq::FragmentReceiverManager::runReceiver_(
int source_rank)
152 while (!stop_requested_ && enabled_sources_.count(source_rank))
154 TLOG_ARB(16,
"FragmentReceiverManager") <<
"runReceiver_ "<< source_rank <<
": Begin loop" << TLOG_ENDL;
155 auto is_suppressed = suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount();
156 while (!stop_requested_ && is_suppressed)
158 TLOG_ARB(6,
"FragmentReceiverManager") <<
"runReceiver_: Suppressing receiver rank " << source_rank << TLOG_ENDL;
159 if (!is_suppressed) input_cv_.notify_all();
162 std::unique_lock<std::mutex> lck(output_cv_mutex_);
163 output_cv_.wait_for(lck, std::chrono::seconds(1));
165 is_suppressed = suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount();
167 if (stop_requested_)
return;
169 auto start_time = std::chrono::steady_clock::now();
170 TLOG_ARB(16,
"FragmentReceiverManager") <<
"runReceiver_: Calling receiveFragment" << TLOG_ENDL;
171 auto fragment = std::unique_ptr<Fragment>(
new Fragment());
173 auto ret = source_plugins_[source_rank]->receiveFragment(*fragment, receive_timeout_);
174 TLOG_ARB(16,
"FragmentReceiverManager") <<
"runReceiver_: Done with receiveFragment, ret=" << ret <<
" (should be " << source_rank <<
")" << TLOG_ENDL;
175 if (ret != source_rank)
continue;
177 artdaq::detail::RawFragmentHeader hdr;
178 auto ret1 = source_plugins_[source_rank]->receiveFragmentHeader(hdr, receive_timeout_);
179 TLOG_ARB(16,
"FragmentReceiverManager") <<
"runReceiver_: Done with receiveFragmentHeader, ret1=" << ret1 <<
" (should be " << source_rank <<
")" << TLOG_ENDL;
181 if (ret1 != source_rank)
continue;
183 fragment->resize(hdr.word_count - hdr.num_words());
184 memcpy(fragment->headerAddress(), &hdr, hdr.num_words() *
sizeof(artdaq::RawDataType));
185 auto ret2 = source_plugins_[source_rank]->receiveFragmentData(fragment->headerAddress() + hdr.num_words(), hdr.word_count - hdr.num_words());
188 TLOG_ERROR(
"FragmentReceiverManager") <<
"ReceiveFragmentHeader returned " << ret1 <<
", but ReceiveFragmentData returned " << ret2 << TLOG_ENDL;
194 if (fragment->type() == artdaq::Fragment::EndOfDataFragmentType)
196 fragment_store_[source_rank].SetEndOfData(*reinterpret_cast<size_t*>(fragment->dataBegin()));
198 else if(fragment->type() == artdaq::Fragment::DataFragmentType || fragment->type() == artdaq::Fragment::ContainerFragmentType || fragment->isUserFragmentType(fragment->type()))
200 recv_frag_count_.incSlot(source_rank);
201 recv_frag_size_.incSlot(source_rank, fragment->size() *
sizeof(RawDataType));
202 recv_seq_count_.setSlot(source_rank, fragment->sequenceID());
213 TLOG_ARB(6,
"FragmentReceiverManager") <<
"runReceiver_: Sending receive stats" << TLOG_ENDL;
214 auto delta_t = std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(std::chrono::steady_clock::now() - start_time).count();
215 metricMan->sendMetric(
"Data Receive Time From Rank " + std::to_string(source_rank), delta_t,
"s", 1, MetricMode::Accumulate);
216 metricMan->sendMetric(
"Data Receive Size From Rank " + std::to_string(source_rank), static_cast<unsigned long>(fragment->size() *
sizeof(RawDataType)),
"B", 1, MetricMode::Accumulate);
217 metricMan->sendMetric(
"Data Receive Rate From Rank " + std::to_string(source_rank), fragment->size() *
sizeof(RawDataType) / delta_t,
"B/s", 1, MetricMode::Average);
221 fragment_store_[source_rank].emplace_back(std::move(fragment));
222 input_cv_.notify_all();
224 if (fragment_store_[source_rank].GetEndOfData() <= recv_frag_count_.slotCount(source_rank))
void start_threads()
Start receiver threads for all enabled sources.
virtual ~FragmentReceiverManager()
FragmentReceiverManager Destructor.
static const int RECV_TIMEOUT
Value to be returned upon receive timeout. Because receivers otherwise return rank, this is also the limit on the number of ranks that artdaq currently supports.
This TransferInterface is a Receiver.
FragmentReceiverManager(const fhicl::ParameterSet &ps)
FragmentReceiverManager Constructor.
std::unique_ptr< artdaq::TransferInterface > MakeTransferPlugin(const fhicl::ParameterSet &pset, std::string plugin_label, TransferInterface::Role role)
Load a TransferInterface plugin.
FragmentPtr recvFragment(int &rank, size_t timeout_usec=0)
Receive a Fragment.