artdaq  v2_02_03
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Pages
DataReceiverManager.cc
1 #include <chrono>
2 
3 #include "artdaq/DAQrate/DataReceiverManager.hh"
4 #include "artdaq/DAQdata/Globals.hh"
5 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
6 
7 artdaq::DataReceiverManager::DataReceiverManager(const fhicl::ParameterSet& pset)
8  : stop_requested_(false)
9  , source_threads_()
10  , source_plugins_()
11  , enabled_sources_()
12  , suppressed_sources_()
13  , fragment_store_()
14  , recv_frag_count_()
15  , recv_frag_size_()
16  , recv_seq_count_()
17  , suppress_noisy_senders_(pset.get<bool>("auto_suppression_enabled",true))
18  , suppression_threshold_(pset.get<size_t>("max_receive_difference", 50))
19  , receive_timeout_(pset.get<size_t>("receive_timeout_usec", 100000))
20 {
21  TLOG_DEBUG("DataReceiverManager") << "Constructor" << TLOG_ENDL;
22  auto enabled_srcs = pset.get<std::vector<int>>("enabled_sources", std::vector<int>());
23  auto enabled_srcs_empty = enabled_srcs.size() == 0;
24  if (enabled_srcs_empty)
25  {
26  TLOG_INFO("DataReceiverManager") << "enabled_sources not specified, assuming all sources enabled." << TLOG_ENDL;
27  }
28  else
29  {
30  for (auto& s : enabled_srcs)
31  {
32  enabled_sources_.insert(s);
33  }
34  }
35 
36  auto srcs = pset.get<fhicl::ParameterSet>("sources", fhicl::ParameterSet());
37  for (auto& s : srcs.get_pset_names())
38  {
39  try
40  {
41  auto ss = srcs.get<fhicl::ParameterSet>(s).get<int>("source_rank");
42  if (enabled_srcs_empty) enabled_sources_.insert(ss);
43  source_plugins_[ss] = std::unique_ptr<TransferInterface>(MakeTransferPlugin(srcs, s, TransferInterface::Role::kReceive));
44  fragment_store_[ss];
45  }
46  catch (cet::exception ex)
47  {
48  TLOG_WARNING("DataReceiverManager") << "cet::exception caught while setting up source " << s << ": " << ex.what() << TLOG_ENDL;
49  }
50  catch (std::exception ex)
51  {
52  TLOG_WARNING("DataReceiverManager") << "std::exception caught while setting up source " << s << ": " << ex.what() << TLOG_ENDL;
53  }
54  catch (...)
55  {
56  TLOG_WARNING("DataReceiverManager") << "Non-cet exception caught while setting up source " << s << "." << TLOG_ENDL;
57  }
58  }
59  if (srcs.get_pset_names().size() == 0)
60  {
61  TLOG_ERROR("DataReceiverManager") << "No sources configured!" << TLOG_ENDL;
62  }
63 }
64 
66 {
67  TLOG_DEBUG("DataReceiverManager") << "Destructor" << TLOG_ENDL;
68  TRACE(5, "~DataReceiverManager: BEGIN: Setting stop_requested to true, frags=%zu, bytes=%zu", count(), byteCount());
69  stop_requested_ = true;
70 
71  TRACE(5, "~DataReceiverManager: Notifying all threads");
72  output_cv_.notify_all();
73 
74  TRACE(5, "~DataReceiverManager: Joining all threads");
75  for (auto& s : source_threads_)
76  {
77  auto& thread = s.second;
78  if (thread.joinable()) thread.join();
79  }
80  TRACE(5, "~DataReceiverManager: DONE");
81 }
82 
83 bool artdaq::DataReceiverManager::fragments_ready_() const
84 {
85  return get_next_source_() != -1;
86 }
87 
88 int artdaq::DataReceiverManager::get_next_source_() const
89 {
90  //std::unique_lock<std::mutex> lck(fragment_store_mutex_);
91  for (auto& it : fragment_store_)
92  {
93  if (!enabled_sources_.count(it.first) || suppressed_sources_.count(it.first)) continue;
94  if (!it.second.empty()) return it.first;
95  }
96  return -1;
97 }
98 
100 {
101  suppressed_sources_.clear();
102  output_cv_.notify_all();
103 }
104 
106 {
107  suppressed_sources_.insert(source);
108 }
109 
110 void artdaq::DataReceiverManager::reject_fragment(int source_rank, FragmentPtr frag)
111 {
112  if (frag == nullptr) return;
113  suppress_source(source_rank);
114  fragment_store_[source_rank].emplace_front(std::move(frag));
115 }
116 
118 {
119  for (auto& source : source_plugins_)
120  {
121  auto& rank = source.first;
122  if (enabled_sources_.count(rank))
123  {
124  source_threads_[rank] = std::thread(&DataReceiverManager::runReceiver_, this, rank);
125  }
126  }
127 }
128 
129 artdaq::FragmentPtr artdaq::DataReceiverManager::recvFragment(int& rank, size_t timeout_usec)
130 {
131  TRACE(5, "DataReceiverManager::recvFragment entered tmo=%zu us", timeout_usec);
132 
133  if (timeout_usec == 0) timeout_usec = 1000000;
134 
135  auto ready = fragments_ready_();
136  size_t waited = 0;
137  auto wait_amount = timeout_usec / 1000 > 1000 ? timeout_usec / 1000 : 1000;
138  TRACE(5, "DataReceiverManager::recvFragment fragment_ready_=%d before wait", ready);
139  while (!ready && waited < timeout_usec)
140  {
141  {
142  std::unique_lock<std::mutex> lck(input_cv_mutex_);
143  input_cv_.wait_for(lck, std::chrono::microseconds(wait_amount));
144  }
145  waited += wait_amount;
146  ready = fragments_ready_();
147  }
148  TRACE(5, "DataReceiverManager::recvFragment fragment_ready_=%d after waited=%zu", ready, waited);
149  if (!ready)
150  {
151  TRACE(5, "DataReceiverManager::recvFragment: No fragments ready, returning empty");
153  return std::unique_ptr<Fragment>{};
154  }
155 
156  int current_source = get_next_source_();
157  FragmentPtr current_fragment = fragment_store_[current_source].front();
158  output_cv_.notify_all();
159  rank = current_source;
160 
161  if (current_fragment != nullptr)
162  TRACE(5, "DataReceiverManager::recvFragment: Done rank=%d, fragment size=%zu words, seqId=%zu", rank, current_fragment->size(), current_fragment->sequenceID());
163  return std::move(current_fragment);
164 }
165 
166 void artdaq::DataReceiverManager::runReceiver_(int source_rank)
167 {
168  while (!stop_requested_ && enabled_sources_.count(source_rank))
169  {
170  TRACE(16, "DataReceiverManager::runReceiver_: Begin loop");
171  auto is_suppressed = (suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount()) || suppressed_sources_.count(source_rank) > 0;
172  while (!stop_requested_ && is_suppressed)
173  {
174  TRACE(6, "DataReceiverManager::runReceiver_: Suppressing receiver rank %d", source_rank);
175  if (!is_suppressed) input_cv_.notify_all();
176  else
177  {
178  std::unique_lock<std::mutex> lck(output_cv_mutex_);
179  output_cv_.wait_for(lck, std::chrono::seconds(1));
180  }
181  is_suppressed = (suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount()) || suppressed_sources_.count(source_rank) > 0;
182  }
183  if (stop_requested_) return;
184 
185  auto start_time = std::chrono::steady_clock::now();
186  TRACE(16, "DataReceiverManager::runReceiver_: Calling receiveFragment");
187  auto fragment = std::unique_ptr<Fragment>(new Fragment());
188  auto ret = source_plugins_[source_rank]->receiveFragment(*fragment, receive_timeout_);
189  TRACE(16, "DataReceiverManager::runReceiver_: Done with receiveFragment, ret=%d (should be %d)", ret, source_rank);
190 
191  if (ret != source_rank) continue; // Receive timeout or other oddness
192 
193  recv_frag_count_.incSlot(source_rank);
194  recv_frag_size_.incSlot(source_rank, fragment->size() * sizeof(RawDataType));
195  recv_seq_count_.setSlot(source_rank, fragment->sequenceID());
196 
197  bool endOfData = fragment->type() == artdaq::Fragment::EndOfDataFragmentType;
198 
199  if (metricMan)
200  {//&& recv_frag_count_.slotCount(source_rank) % 100 == 0) {
201  TRACE(6, "DataReceiverManager::runReceiver_: Sending receive stats");
202  auto delta_t = std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(std::chrono::steady_clock::now() - start_time).count();
203  metricMan->sendMetric("Data Receive Time From Rank " + std::to_string(source_rank), delta_t, "s", 1);
204  metricMan->sendMetric("Data Receive Size From Rank " + std::to_string(source_rank), static_cast<unsigned long>(fragment->size() * sizeof(RawDataType)), "B", 1);
205  metricMan->sendMetric("Data Receive Rate From Rank " + std::to_string(source_rank), fragment->size() * sizeof(RawDataType) / delta_t, "B/s", 1);
206  }
207 
208  if (stop_requested_) return;
209 
210  fragment_store_[source_rank].emplace_back(std::move(fragment));
211  input_cv_.notify_all();
212 
213  if (endOfData)
214  {
215  return;
216  }
217  }
218 }
void unsuppressAll()
Re-enable all sources.
void reject_fragment(int source_rank, FragmentPtr frag)
Place the given Fragment back in the FragmentStore (Called when the EventStore is full) ...
static const int RECV_TIMEOUT
Value to be returned upon receive timeout. Because receivers otherwise return rank, this is also the limit on the number of ranks that artdaq currently supports.
This TransferInterface is a Receiver.
std::unique_ptr< artdaq::TransferInterface > MakeTransferPlugin(const fhicl::ParameterSet &pset, std::string plugin_label, TransferInterface::Role role)
Load a TransferInterface plugin.
FragmentPtr recvFragment(int &rank, size_t timeout_usec=0)
Receive a Fragment.
DataReceiverManager(const fhicl::ParameterSet &ps)
DataReceiverManager Constructor.
virtual ~DataReceiverManager()
DataReceiverManager Destructor.
void suppress_source(int source)
Suppress the given source.
void start_threads()
Start receiver threads for all enabled sources.