3 #include "artdaq/DAQrate/DataReceiverManager.hh"
4 #include "artdaq/DAQdata/Globals.hh"
5 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
8 : stop_requested_(false)
12 , suppressed_sources_()
17 , suppress_noisy_senders_(pset.get<bool>(
"auto_suppression_enabled",true))
18 , suppression_threshold_(pset.get<size_t>(
"max_receive_difference", 50))
19 , receive_timeout_(pset.get<size_t>(
"receive_timeout_usec", 100000))
21 TLOG_DEBUG(
"DataReceiverManager") <<
"Constructor" << TLOG_ENDL;
22 auto enabled_srcs = pset.get<std::vector<int>>(
"enabled_sources", std::vector<int>());
23 auto enabled_srcs_empty = enabled_srcs.size() == 0;
24 if (enabled_srcs_empty)
26 TLOG_INFO(
"DataReceiverManager") <<
"enabled_sources not specified, assuming all sources enabled." << TLOG_ENDL;
30 for (
auto& s : enabled_srcs)
32 enabled_sources_.insert(s);
36 auto srcs = pset.get<fhicl::ParameterSet>(
"sources", fhicl::ParameterSet());
37 for (
auto& s : srcs.get_pset_names())
41 auto ss = srcs.get<fhicl::ParameterSet>(s).get<int>(
"source_rank");
42 if (enabled_srcs_empty) enabled_sources_.insert(ss);
46 catch (cet::exception ex)
48 TLOG_WARNING(
"DataReceiverManager") <<
"cet::exception caught while setting up source " << s <<
": " << ex.what() << TLOG_ENDL;
50 catch (std::exception ex)
52 TLOG_WARNING(
"DataReceiverManager") <<
"std::exception caught while setting up source " << s <<
": " << ex.what() << TLOG_ENDL;
56 TLOG_WARNING(
"DataReceiverManager") <<
"Non-cet exception caught while setting up source " << s <<
"." << TLOG_ENDL;
59 if (srcs.get_pset_names().size() == 0)
61 TLOG_ERROR(
"DataReceiverManager") <<
"No sources configured!" << TLOG_ENDL;
67 TLOG_DEBUG(
"DataReceiverManager") <<
"Destructor" << TLOG_ENDL;
68 TRACE(5,
"~DataReceiverManager: BEGIN: Setting stop_requested to true, frags=%zu, bytes=%zu", count(), byteCount());
69 stop_requested_ =
true;
71 TRACE(5,
"~DataReceiverManager: Notifying all threads");
72 output_cv_.notify_all();
74 TRACE(5,
"~DataReceiverManager: Joining all threads");
75 for (
auto& s : source_threads_)
77 auto& thread = s.second;
78 if (thread.joinable()) thread.join();
80 TRACE(5,
"~DataReceiverManager: DONE");
83 bool artdaq::DataReceiverManager::fragments_ready_()
const
85 return get_next_source_() != -1;
88 int artdaq::DataReceiverManager::get_next_source_()
const
91 for (
auto& it : fragment_store_)
93 if (!enabled_sources_.count(it.first) || suppressed_sources_.count(it.first))
continue;
94 if (!it.second.empty())
return it.first;
101 suppressed_sources_.clear();
102 output_cv_.notify_all();
107 suppressed_sources_.insert(source);
112 if (frag ==
nullptr)
return;
113 suppress_source(source_rank);
114 fragment_store_[source_rank].emplace_front(std::move(frag));
119 for (
auto& source : source_plugins_)
121 auto& rank = source.first;
122 if (enabled_sources_.count(rank))
124 source_threads_[rank] = std::thread(&DataReceiverManager::runReceiver_,
this, rank);
131 TRACE(5,
"DataReceiverManager::recvFragment entered tmo=%zu us", timeout_usec);
133 if (timeout_usec == 0) timeout_usec = 1000000;
135 auto ready = fragments_ready_();
137 auto wait_amount = timeout_usec / 1000 > 1000 ? timeout_usec / 1000 : 1000;
138 TRACE(5,
"DataReceiverManager::recvFragment fragment_ready_=%d before wait", ready);
139 while (!ready && waited < timeout_usec)
142 std::unique_lock<std::mutex> lck(input_cv_mutex_);
143 input_cv_.wait_for(lck, std::chrono::microseconds(wait_amount));
145 waited += wait_amount;
146 ready = fragments_ready_();
148 TRACE(5,
"DataReceiverManager::recvFragment fragment_ready_=%d after waited=%zu", ready, waited);
151 TRACE(5,
"DataReceiverManager::recvFragment: No fragments ready, returning empty");
153 return std::unique_ptr<Fragment>{};
156 int current_source = get_next_source_();
157 FragmentPtr current_fragment = fragment_store_[current_source].front();
158 output_cv_.notify_all();
159 rank = current_source;
161 if (current_fragment !=
nullptr)
162 TRACE(5,
"DataReceiverManager::recvFragment: Done rank=%d, fragment size=%zu words, seqId=%zu", rank, current_fragment->size(), current_fragment->sequenceID());
163 return std::move(current_fragment);
166 void artdaq::DataReceiverManager::runReceiver_(
int source_rank)
168 while (!stop_requested_ && enabled_sources_.count(source_rank))
170 TRACE(16,
"DataReceiverManager::runReceiver_: Begin loop");
171 auto is_suppressed = (suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount()) || suppressed_sources_.count(source_rank) > 0;
172 while (!stop_requested_ && is_suppressed)
174 TRACE(6,
"DataReceiverManager::runReceiver_: Suppressing receiver rank %d", source_rank);
175 if (!is_suppressed) input_cv_.notify_all();
178 std::unique_lock<std::mutex> lck(output_cv_mutex_);
179 output_cv_.wait_for(lck, std::chrono::seconds(1));
181 is_suppressed = (suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount()) || suppressed_sources_.count(source_rank) > 0;
183 if (stop_requested_)
return;
185 auto start_time = std::chrono::steady_clock::now();
186 TRACE(16,
"DataReceiverManager::runReceiver_: Calling receiveFragment");
187 auto fragment = std::unique_ptr<Fragment>(
new Fragment());
188 auto ret = source_plugins_[source_rank]->receiveFragment(*fragment, receive_timeout_);
189 TRACE(16,
"DataReceiverManager::runReceiver_: Done with receiveFragment, ret=%d (should be %d)", ret, source_rank);
191 if (ret != source_rank)
continue;
193 recv_frag_count_.incSlot(source_rank);
194 recv_frag_size_.incSlot(source_rank, fragment->size() *
sizeof(RawDataType));
195 recv_seq_count_.setSlot(source_rank, fragment->sequenceID());
197 bool endOfData = fragment->type() == artdaq::Fragment::EndOfDataFragmentType;
201 TRACE(6,
"DataReceiverManager::runReceiver_: Sending receive stats");
202 auto delta_t = std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(std::chrono::steady_clock::now() - start_time).count();
203 metricMan->sendMetric(
"Data Receive Time From Rank " + std::to_string(source_rank), delta_t,
"s", 1);
204 metricMan->sendMetric(
"Data Receive Size From Rank " + std::to_string(source_rank), static_cast<unsigned long>(fragment->size() *
sizeof(RawDataType)),
"B", 1);
205 metricMan->sendMetric(
"Data Receive Rate From Rank " + std::to_string(source_rank), fragment->size() *
sizeof(RawDataType) / delta_t,
"B/s", 1);
208 if (stop_requested_)
return;
210 fragment_store_[source_rank].emplace_back(std::move(fragment));
211 input_cv_.notify_all();
void unsuppressAll()
Re-enable all sources.
void reject_fragment(int source_rank, FragmentPtr frag)
Place the given Fragment back in the FragmentStore (Called when the EventStore is full) ...
static const int RECV_TIMEOUT
Value to be returned upon receive timeout. Because receivers otherwise return rank, this is also the limit on the number of ranks that artdaq currently supports.
This TransferInterface is a Receiver.
std::unique_ptr< artdaq::TransferInterface > MakeTransferPlugin(const fhicl::ParameterSet &pset, std::string plugin_label, TransferInterface::Role role)
Load a TransferInterface plugin.
FragmentPtr recvFragment(int &rank, size_t timeout_usec=0)
Receive a Fragment.
DataReceiverManager(const fhicl::ParameterSet &ps)
DataReceiverManager Constructor.
virtual ~DataReceiverManager()
DataReceiverManager Destructor.
void suppress_source(int source)
Suppress the given source.
void start_threads()
Start receiver threads for all enabled sources.