artdaq  v3_01_00
FragmentReceiverManager.cc
1 #define TRACE_NAME "FragmentReceiverManager"
2 
3 #include <chrono>
4 
5 #include "artdaq/DAQrate/FragmentReceiverManager.hh"
6 #include "artdaq/DAQdata/Globals.hh"
7 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
8 #include "cetlib_except/exception.h"
9 
11  : stop_requested_(false)
12  , source_threads_()
13  , source_plugins_()
14  , enabled_sources_()
15  , fragment_store_()
16  , recv_frag_count_()
17  , recv_frag_size_()
18  , recv_seq_count_()
19  , suppress_noisy_senders_(pset.get<bool>("auto_suppression_enabled", true))
20  , suppression_threshold_(pset.get<size_t>("max_receive_difference", 50))
21  , receive_timeout_(pset.get<size_t>("receive_timeout_usec", 100000))
22  , last_source_(-1)
23 {
24  TLOG(TLVL_DEBUG) << "Constructor" ;
25  auto enabled_srcs = pset.get<std::vector<int>>("enabled_sources", std::vector<int>());
26  auto enabled_srcs_empty = enabled_srcs.size() == 0;
27  if (enabled_srcs_empty)
28  {
29  TLOG(TLVL_INFO) << "enabled_sources not specified, assuming all sources enabled." ;
30  }
31  else
32  {
33  for (auto& s : enabled_srcs)
34  {
35  enabled_sources_.insert(s);
36  }
37  }
38 
39  auto srcs = pset.get<fhicl::ParameterSet>("sources", fhicl::ParameterSet());
40  for (auto& s : srcs.get_pset_names())
41  {
42  try
43  {
44  auto transfer = std::unique_ptr<TransferInterface>(MakeTransferPlugin(srcs, s,
46  auto source_rank = transfer->source_rank();
47  if (enabled_srcs_empty) enabled_sources_.insert(source_rank);
48  source_plugins_[source_rank] = std::move(transfer);
49  fragment_store_[source_rank];
50  }
51  catch (cet::exception ex)
52  {
53  TLOG(TLVL_WARNING) << "cet::exception caught while setting up source " << s << ": " << ex.what() ;
54  }
55  catch (std::exception ex)
56  {
57  TLOG(TLVL_WARNING) << "std::exception caught while setting up source " << s << ": " << ex.what() ;
58  }
59  catch (...)
60  {
61  TLOG(TLVL_WARNING) << "Non-cet exception caught while setting up source " << s << "." ;
62  }
63  }
64  if (srcs.get_pset_names().size() == 0)
65  {
66  TLOG(TLVL_ERROR) << "No sources configured!" ;
67  }
68 }
69 
71 {
72  TLOG(TLVL_DEBUG) << "Destructor" ;
73  TLOG(5) << "~FragmentReceiverManager: BEGIN: Setting stop_requested to true, frags=" << std::to_string(count()) << ", bytes=" << std::to_string(byteCount()) ;
74  stop_requested_ = true;
75 
76  TLOG(5) << "~FragmentReceiverManager: Notifying all threads" ;
77  output_cv_.notify_all();
78 
79  TLOG(5) << "~FragmentReceiverManager: Joining all threads" ;
80  for (auto& s : source_threads_)
81  {
82  auto& thread = s.second;
83  if (thread.joinable()) thread.join();
84  }
85  TLOG(5) << "~FragmentReceiverManager: DONE" ;
86 }
87 
88 bool artdaq::FragmentReceiverManager::fragments_ready_() const
89 {
90  for (auto& it : fragment_store_)
91  {
92  if (!enabled_sources_.count(it.first)) continue;
93  if (!it.second.empty()) { return true; }
94  }
95  return false;
96 }
97 
98 int artdaq::FragmentReceiverManager::get_next_source_() const
99 {
100  //std::unique_lock<std::mutex> lck(fragment_store_mutex_);
101  std::set<int> ready_sources;
102  for (auto& it : fragment_store_)
103  {
104  if (!enabled_sources_.count(it.first)) continue;
105  if (!it.second.empty()) {
106  ready_sources.insert(it.first);
107  }
108  }
109 
110  if (ready_sources.size()) {
111  auto iter = ready_sources.find(last_source_);
112  if (iter == ready_sources.end() || ++iter == ready_sources.end()) {
113  TLOG(TLVL_DEBUG) << "get_next_source returning " << *ready_sources.begin();
114  last_source_ = *ready_sources.begin();
115  return *ready_sources.begin();
116  }
117 
118  TLOG(TLVL_DEBUG) << "get_next_source returning " << *iter;
119  last_source_ = *iter;
120  return *iter;
121  }
122 
123  TLOG(TLVL_DEBUG) << "get_next_source returning -1";
124  return -1;
125 }
126 
128 {
129  for (auto& source : source_plugins_)
130  {
131  auto& rank = source.first;
132  if (enabled_sources_.count(rank))
133  {
134  source_threads_[rank] = boost::thread(&FragmentReceiverManager::runReceiver_, this, rank);
135  }
136  }
137 }
138 
139 artdaq::FragmentPtr artdaq::FragmentReceiverManager::recvFragment(int& rank, size_t timeout_usec)
140 {
141  TLOG(5) <<"recvFragment entered tmo=" << std::to_string(timeout_usec) << " us" ;
142 
143  if (timeout_usec == 0) timeout_usec = 1000000;
144 
145  auto ready = fragments_ready_();
146  size_t waited = 0;
147  auto wait_amount = timeout_usec / 1000 > 1000 ? timeout_usec / 1000 : 1000;
148  TLOG(5) << "recvFragment fragment_ready_=" << ready << " before wait" ;
149  while (!ready && waited < timeout_usec)
150  {
151  {
152  std::unique_lock<std::mutex> lck(input_cv_mutex_);
153  input_cv_.wait_for(lck, std::chrono::microseconds(wait_amount));
154  }
155  waited += wait_amount;
156  ready = fragments_ready_();
157  if (running_sources_.size() == 0) break;
158  }
159  TLOG(5) << "recvFragment fragment_ready_=" << ready << " after waited=" << std::to_string( waited) ;
160  if (!ready)
161  {
162  TLOG(5) << "recvFragment: No fragments ready, returning empty" ;
164  return std::unique_ptr<Fragment>{};
165  }
166 
167  int current_source = get_next_source_();
168  FragmentPtr current_fragment = fragment_store_[current_source].front();
169  output_cv_.notify_all();
170  rank = current_source;
171 
172  if (current_fragment != nullptr)
173  TLOG(5) << "recvFragment: Done rank="<< rank <<", fragment size="<<std::to_string(current_fragment->size()) << " words, seqId=" << std::to_string( current_fragment->sequenceID()) ;
174  return current_fragment;
175 }
176 
177 void artdaq::FragmentReceiverManager::runReceiver_(int source_rank)
178 {
179  running_sources_.insert(source_rank);
180  auto eod_quiet_start = std::chrono::steady_clock::now();
181  while (!stop_requested_ && enabled_sources_.count(source_rank))
182  {
183  TLOG(16) << "runReceiver_ "<< source_rank << ": Begin loop" ;
184  auto is_suppressed = suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount();
185  while (!stop_requested_ && is_suppressed)
186  {
187  TLOG(6) << "runReceiver_: Suppressing receiver rank " << source_rank ;
188  if (!is_suppressed) input_cv_.notify_all();
189  else
190  {
191  std::unique_lock<std::mutex> lck(output_cv_mutex_);
192  output_cv_.wait_for(lck, std::chrono::seconds(1));
193  }
194  is_suppressed = suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount();
195  }
196  if (stop_requested_)
197  {
198  running_sources_.erase(source_rank);
199  return;
200  }
201 
202  if (fragment_store_[source_rank].GetEndOfData() <= recv_frag_count_.slotCount(source_rank) && TimeUtils::GetElapsedTimeMilliseconds(eod_quiet_start) > 1000)
203  {
204  TLOG(TLVL_DEBUG) << "runReceiver_: EndOfData conditions satisfied, ending receive loop";
205  running_sources_.erase(source_rank);
206  return;
207  }
208 
209  auto start_time = std::chrono::steady_clock::now();
210  TLOG(16) << "runReceiver_: Calling receiveFragment" ;
211  auto fragment = std::unique_ptr<Fragment>(new Fragment());
212 #if 0
213  auto ret = source_plugins_[source_rank]->receiveFragment(*fragment, receive_timeout_);
214  TLOG(16) << "runReceiver_: Done with receiveFragment, ret=" << ret << " (should be " << source_rank << ")" ;
215  if (ret != source_rank) continue; // Receive timeout or other oddness
216 #else
217  artdaq::detail::RawFragmentHeader hdr;
218  auto ret1 = source_plugins_[source_rank]->receiveFragmentHeader(hdr, receive_timeout_);
219  TLOG(16) << "runReceiver_: Done with receiveFragmentHeader, ret1=" << ret1 << " (should be " << source_rank << ")" ;
220 
221  if (ret1 != source_rank) continue; // Receive timeout or other oddness
222  eod_quiet_start = std::chrono::steady_clock::now();
223 
224  fragment->resize(hdr.word_count - hdr.num_words());
225  memcpy(fragment->headerAddress(), &hdr, hdr.num_words() * sizeof(artdaq::RawDataType));
226  auto ret2 = source_plugins_[source_rank]->receiveFragmentData(fragment->headerAddress() + hdr.num_words(), hdr.word_count - hdr.num_words());
227  if (ret2 != ret1)
228  {
229  TLOG(TLVL_ERROR) << "ReceiveFragmentHeader returned " << ret1 << ", but ReceiveFragmentData returned " << ret2 ;
230  continue;
231  }
232 #endif
233 
234 
235  if (fragment->type() == artdaq::Fragment::EndOfDataFragmentType)
236  {
237  TLOG(TLVL_TRACE) << "runReceiver_: EndOfData Fragment received!";
238  fragment_store_[source_rank].SetEndOfData(*reinterpret_cast<size_t*>(fragment->dataBegin()));
239  }
240  else if(fragment->type() == artdaq::Fragment::DataFragmentType || fragment->type() == artdaq::Fragment::ContainerFragmentType || fragment->isUserFragmentType(fragment->type()))
241  {
242  TLOG(TLVL_TRACE) << "runReceiver_: Data Fragment received!";
243  recv_frag_count_.incSlot(source_rank);
244  recv_frag_size_.incSlot(source_rank, fragment->size() * sizeof(RawDataType));
245  recv_seq_count_.setSlot(source_rank, fragment->sequenceID());
246  }
247  else
248  {
249  continue;
250  }
251 
252 
253 
254  if (metricMan)
255  {//&& recv_frag_count_.slotCount(source_rank) % 100 == 0) {
256  TLOG(6) << "runReceiver_: Sending receive stats" ;
257  auto delta_t = std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(std::chrono::steady_clock::now() - start_time).count();
258  metricMan->sendMetric("Data Receive Time From Rank " + std::to_string(source_rank), delta_t, "s", 1, MetricMode::Accumulate);
259  metricMan->sendMetric("Data Receive Size From Rank " + std::to_string(source_rank), static_cast<unsigned long>(fragment->size() * sizeof(RawDataType)), "B", 1, MetricMode::Accumulate);
260  metricMan->sendMetric("Data Receive Rate From Rank " + std::to_string(source_rank), fragment->size() * sizeof(RawDataType) / delta_t, "B/s", 1, MetricMode::Average);
261  }
262 
263 
264  fragment_store_[source_rank].emplace_back(std::move(fragment));
265  TLOG(TLVL_TRACE) << "runReceiver_: There are now " << fragment_store_[source_rank].size() << " Fragments stored from this source";
266  input_cv_.notify_all();
267 
268  }
269 
270  running_sources_.erase(source_rank);
271 }
void start_threads()
Start receiver threads for all enabled sources.
Value to be returned upon receive timeout.
virtual ~FragmentReceiverManager()
FragmentReceiverManager Destructor.
This TransferInterface is a Receiver.
FragmentReceiverManager(const fhicl::ParameterSet &ps)
FragmentReceiverManager Constructor.
std::unique_ptr< artdaq::TransferInterface > MakeTransferPlugin(const fhicl::ParameterSet &pset, std::string plugin_label, TransferInterface::Role role)
Load a TransferInterface plugin.
FragmentPtr recvFragment(int &rank, size_t timeout_usec=0)
Receive a Fragment.