artdaq  v3_00_03
FragmentReceiverManager.cc
1 #include <chrono>
2 
3 #include "proto/FragmentReceiverManager.hh"
4 #include "artdaq/DAQdata/Globals.hh"
5 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
6 #include "cetlib_except/exception.h"
7 
9  : stop_requested_(false)
10  , source_threads_()
11  , source_plugins_()
12  , enabled_sources_()
13  , fragment_store_()
14  , recv_frag_count_()
15  , recv_frag_size_()
16  , recv_seq_count_()
17  , suppress_noisy_senders_(pset.get<bool>("auto_suppression_enabled", true))
18  , suppression_threshold_(pset.get<size_t>("max_receive_difference", 50))
19  , receive_timeout_(pset.get<size_t>("receive_timeout_usec", 100000))
20 {
21  TLOG_DEBUG("FragmentReceiverManager") << "Constructor" << TLOG_ENDL;
22  auto enabled_srcs = pset.get<std::vector<int>>("enabled_sources", std::vector<int>());
23  auto enabled_srcs_empty = enabled_srcs.size() == 0;
24  if (enabled_srcs_empty)
25  {
26  TLOG_INFO("FragmentReceiverManager") << "enabled_sources not specified, assuming all sources enabled." << TLOG_ENDL;
27  }
28  else
29  {
30  for (auto& s : enabled_srcs)
31  {
32  enabled_sources_.insert(s);
33  }
34  }
35 
36  auto srcs = pset.get<fhicl::ParameterSet>("sources", fhicl::ParameterSet());
37  for (auto& s : srcs.get_pset_names())
38  {
39  try
40  {
41  auto transfer = std::unique_ptr<TransferInterface>(MakeTransferPlugin(srcs, s,
43  auto source_rank = transfer->source_rank();
44  if (enabled_srcs_empty) enabled_sources_.insert(source_rank);
45  source_plugins_[source_rank] = std::move(transfer);
46  fragment_store_[source_rank];
47  }
48  catch (cet::exception ex)
49  {
50  TLOG_WARNING("FragmentReceiverManager") << "cet::exception caught while setting up source " << s << ": " << ex.what() << TLOG_ENDL;
51  }
52  catch (std::exception ex)
53  {
54  TLOG_WARNING("FragmentReceiverManager") << "std::exception caught while setting up source " << s << ": " << ex.what() << TLOG_ENDL;
55  }
56  catch (...)
57  {
58  TLOG_WARNING("FragmentReceiverManager") << "Non-cet exception caught while setting up source " << s << "." << TLOG_ENDL;
59  }
60  }
61  if (srcs.get_pset_names().size() == 0)
62  {
63  TLOG_ERROR("FragmentReceiverManager") << "No sources configured!" << TLOG_ENDL;
64  }
65 }
66 
68 {
69  TLOG_DEBUG("FragmentReceiverManager") << "Destructor" << TLOG_ENDL;
70  TLOG_ARB(5, "FragmentReceiverManager") << "~FragmentReceiverManager: BEGIN: Setting stop_requested to true, frags=" << std::to_string(count()) << ", bytes=" << std::to_string(byteCount()) << TLOG_ENDL;
71  stop_requested_ = true;
72 
73  TLOG_ARB(5, "FragmentReceiverManager") << "~FragmentReceiverManager: Notifying all threads" << TLOG_ENDL;
74  output_cv_.notify_all();
75 
76  TLOG_ARB(5, "FragmentReceiverManager") << "~FragmentReceiverManager: Joining all threads" << TLOG_ENDL;
77  for (auto& s : source_threads_)
78  {
79  auto& thread = s.second;
80  if (thread.joinable()) thread.join();
81  }
82  TLOG_ARB(5, "FragmentReceiverManager") << "~FragmentReceiverManager: DONE" << TLOG_ENDL;
83 }
84 
85 bool artdaq::FragmentReceiverManager::fragments_ready_() const
86 {
87  return get_next_source_() != -1;
88 }
89 
90 int artdaq::FragmentReceiverManager::get_next_source_() const
91 {
92  //std::unique_lock<std::mutex> lck(fragment_store_mutex_);
93  for (auto& it : fragment_store_)
94  {
95  if (!enabled_sources_.count(it.first)) continue;
96  if (!it.second.empty()) return it.first;
97  }
98  return -1;
99 }
100 
102 {
103  for (auto& source : source_plugins_)
104  {
105  auto& rank = source.first;
106  if (enabled_sources_.count(rank))
107  {
108  source_threads_[rank] = boost::thread(&FragmentReceiverManager::runReceiver_, this, rank);
109  }
110  }
111 }
112 
113 artdaq::FragmentPtr artdaq::FragmentReceiverManager::recvFragment(int& rank, size_t timeout_usec)
114 {
115  TLOG_ARB(5, "FragmentReceiverManager") <<"recvFragment entered tmo=" << std::to_string(timeout_usec) << " us" << TLOG_ENDL;
116 
117  if (timeout_usec == 0) timeout_usec = 1000000;
118 
119  auto ready = fragments_ready_();
120  size_t waited = 0;
121  auto wait_amount = timeout_usec / 1000 > 1000 ? timeout_usec / 1000 : 1000;
122  TLOG_ARB(5, "FragmentReceiverManager") << "recvFragment fragment_ready_=" << ready << " before wait" << TLOG_ENDL;
123  while (!ready && waited < timeout_usec)
124  {
125  {
126  std::unique_lock<std::mutex> lck(input_cv_mutex_);
127  input_cv_.wait_for(lck, std::chrono::microseconds(wait_amount));
128  }
129  waited += wait_amount;
130  ready = fragments_ready_();
131  }
132  TLOG_ARB(5, "FragmentReceiverManager") << "recvFragment fragment_ready_=" << ready << " after waited=" << std::to_string( waited) << TLOG_ENDL;
133  if (!ready)
134  {
135  TLOG_ARB(5, "FragmentReceiverManager") << "recvFragment: No fragments ready, returning empty" << TLOG_ENDL;
137  return std::unique_ptr<Fragment>{};
138  }
139 
140  int current_source = get_next_source_();
141  FragmentPtr current_fragment = fragment_store_[current_source].front();
142  output_cv_.notify_all();
143  rank = current_source;
144 
145  if (current_fragment != nullptr)
146  TLOG_ARB(5, "FragmentReceiverManager") << "recvFragment: Done rank="<< rank <<", fragment size="<<std::to_string(current_fragment->size()) << " words, seqId=" << std::to_string( current_fragment->sequenceID()) << TLOG_ENDL;
147  return std::move(current_fragment);
148 }
149 
150 void artdaq::FragmentReceiverManager::runReceiver_(int source_rank)
151 {
152  while (!stop_requested_ && enabled_sources_.count(source_rank))
153  {
154  TLOG_ARB(16, "FragmentReceiverManager") << "runReceiver_ "<< source_rank << ": Begin loop" << TLOG_ENDL;
155  auto is_suppressed = suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount();
156  while (!stop_requested_ && is_suppressed)
157  {
158  TLOG_ARB(6, "FragmentReceiverManager") << "runReceiver_: Suppressing receiver rank " << source_rank << TLOG_ENDL;
159  if (!is_suppressed) input_cv_.notify_all();
160  else
161  {
162  std::unique_lock<std::mutex> lck(output_cv_mutex_);
163  output_cv_.wait_for(lck, std::chrono::seconds(1));
164  }
165  is_suppressed = suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount();
166  }
167  if (stop_requested_) return;
168 
169  auto start_time = std::chrono::steady_clock::now();
170  TLOG_ARB(16, "FragmentReceiverManager") << "runReceiver_: Calling receiveFragment" << TLOG_ENDL;
171  auto fragment = std::unique_ptr<Fragment>(new Fragment());
172 #if 0
173  auto ret = source_plugins_[source_rank]->receiveFragment(*fragment, receive_timeout_);
174  TLOG_ARB(16, "FragmentReceiverManager") << "runReceiver_: Done with receiveFragment, ret=" << ret << " (should be " << source_rank << ")" << TLOG_ENDL;
175  if (ret != source_rank) continue; // Receive timeout or other oddness
176 #else
177  artdaq::detail::RawFragmentHeader hdr;
178  auto ret1 = source_plugins_[source_rank]->receiveFragmentHeader(hdr, receive_timeout_);
179  TLOG_ARB(16, "FragmentReceiverManager") << "runReceiver_: Done with receiveFragmentHeader, ret1=" << ret1 << " (should be " << source_rank << ")" << TLOG_ENDL;
180 
181  if (ret1 != source_rank) continue; // Receive timeout or other oddness
182 
183  fragment->resize(hdr.word_count - hdr.num_words());
184  memcpy(fragment->headerAddress(), &hdr, hdr.num_words() * sizeof(artdaq::RawDataType));
185  auto ret2 = source_plugins_[source_rank]->receiveFragmentData(fragment->headerAddress() + hdr.num_words(), hdr.word_count - hdr.num_words());
186  if (ret2 != ret1)
187  {
188  TLOG_ERROR("FragmentReceiverManager") << "ReceiveFragmentHeader returned " << ret1 << ", but ReceiveFragmentData returned " << ret2 << TLOG_ENDL;
189  continue;
190  }
191 #endif
192 
193 
194  if (fragment->type() == artdaq::Fragment::EndOfDataFragmentType)
195  {
196  fragment_store_[source_rank].SetEndOfData(*reinterpret_cast<size_t*>(fragment->dataBegin()));
197  }
198  else if(fragment->type() == artdaq::Fragment::DataFragmentType || fragment->type() == artdaq::Fragment::ContainerFragmentType || fragment->isUserFragmentType(fragment->type()))
199  {
200  recv_frag_count_.incSlot(source_rank);
201  recv_frag_size_.incSlot(source_rank, fragment->size() * sizeof(RawDataType));
202  recv_seq_count_.setSlot(source_rank, fragment->sequenceID());
203  }
204  else
205  {
206  continue;
207  }
208 
209 
210 
211  if (metricMan)
212  {//&& recv_frag_count_.slotCount(source_rank) % 100 == 0) {
213  TLOG_ARB(6, "FragmentReceiverManager") << "runReceiver_: Sending receive stats" << TLOG_ENDL;
214  auto delta_t = std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(std::chrono::steady_clock::now() - start_time).count();
215  metricMan->sendMetric("Data Receive Time From Rank " + std::to_string(source_rank), delta_t, "s", 1, MetricMode::Accumulate);
216  metricMan->sendMetric("Data Receive Size From Rank " + std::to_string(source_rank), static_cast<unsigned long>(fragment->size() * sizeof(RawDataType)), "B", 1, MetricMode::Accumulate);
217  metricMan->sendMetric("Data Receive Rate From Rank " + std::to_string(source_rank), fragment->size() * sizeof(RawDataType) / delta_t, "B/s", 1, MetricMode::Average);
218  }
219 
220 
221  fragment_store_[source_rank].emplace_back(std::move(fragment));
222  input_cv_.notify_all();
223 
224  if (fragment_store_[source_rank].GetEndOfData() <= recv_frag_count_.slotCount(source_rank))
225  {
226  return;
227  }
228  }
229 }
void start_threads()
Start receiver threads for all enabled sources.
virtual ~FragmentReceiverManager()
FragmentReceiverManager Destructor.
static const int RECV_TIMEOUT
Value to be returned upon receive timeout. Because receivers otherwise return rank, this is also the limit on the number of ranks that artdaq currently supports.
This TransferInterface is a Receiver.
FragmentReceiverManager(const fhicl::ParameterSet &ps)
FragmentReceiverManager Constructor.
std::unique_ptr< artdaq::TransferInterface > MakeTransferPlugin(const fhicl::ParameterSet &pset, std::string plugin_label, TransferInterface::Role role)
Load a TransferInterface plugin.
FragmentPtr recvFragment(int &rank, size_t timeout_usec=0)
Receive a Fragment.