artdaq  v3_03_00
DataSenderManager.cc
1 #define TRACE_NAME (app_name + "_DataSenderManager").c_str()
2 #include "artdaq/DAQdata/Globals.hh"
3 #include "artdaq/DAQrate/DataSenderManager.hh"
4 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
5 #include "artdaq/TransferPlugins/detail/HostMap.hh"
6 
7 #include <chrono>
8 #include "canvas/Utilities/Exception.h"
9 #include <arpa/inet.h>
10 #include <netinet/in.h>
11 #include <sys/types.h>
12 #include <poll.h>
13 #include <sys/socket.h>
15 
16 artdaq::DataSenderManager::DataSenderManager(const fhicl::ParameterSet& pset)
17  : destinations_()
18  , destination_metric_data_()
19  , destination_metric_send_time_()
20  , enabled_destinations_()
21  , sent_frag_count_()
22  , broadcast_sends_(pset.get<bool>("broadcast_sends", false))
23  , non_blocking_mode_(pset.get<bool>("nonblocking_sends", false))
24  , send_timeout_us_(pset.get<size_t>("send_timeout_usec", 5000000))
25  , send_retry_count_(pset.get<size_t>("send_retry_count", 2))
26  , routing_master_mode_(detail::RoutingMasterMode::INVALID)
27  , should_stop_(false)
28  , ack_socket_(-1)
29  , table_socket_(-1)
30  , routing_table_last_(0)
31  , routing_table_max_size_(pset.get<size_t>("routing_table_max_size", 1000))
32  , highest_sequence_id_routed_(0)
33 {
34  TLOG(TLVL_DEBUG) << "Received pset: " << pset.to_string();
35 
36  // Validate parameters
37  if (send_timeout_us_ == 0) send_timeout_us_ = std::numeric_limits<size_t>::max();
38 
39  auto rmConfig = pset.get<fhicl::ParameterSet>("routing_table_config", fhicl::ParameterSet());
40  use_routing_master_ = rmConfig.get<bool>("use_routing_master", false);
41  table_port_ = rmConfig.get<int>("table_update_port", 35556);
42  table_address_ = rmConfig.get<std::string>("table_update_address", "227.128.12.28");
43  ack_port_ = rmConfig.get<int>("table_acknowledge_port", 35557);
44  ack_address_ = rmConfig.get<std::string>("routing_master_hostname", "localhost");
45  routing_timeout_ms_ = (rmConfig.get<int>("routing_timeout_ms", 1000));
46  routing_retry_count_ = rmConfig.get<int>("routing_retry_count", 5);
47 
48  hostMap_t host_map = MakeHostMap(pset);
49  size_t tcp_send_buffer_size = pset.get<size_t>("tcp_send_buffer_size", 0);
50  size_t max_fragment_size_words = pset.get<size_t>("max_fragment_size_words", 0);
51 
52  auto dests = pset.get<fhicl::ParameterSet>("destinations", fhicl::ParameterSet());
53  for (auto& d : dests.get_pset_names())
54  {
55  auto dest_pset = dests.get<fhicl::ParameterSet>(d);
56  host_map = MakeHostMap(dest_pset, host_map);
57  }
58  auto host_map_pset = MakeHostMapPset(host_map);
59  fhicl::ParameterSet dests_mod;
60  for (auto& d : dests.get_pset_names())
61  {
62  auto dest_pset = dests.get<fhicl::ParameterSet>(d);
63  dest_pset.erase("host_map");
64  dest_pset.put<std::vector<fhicl::ParameterSet>>("host_map", host_map_pset);
65 
66  if (tcp_send_buffer_size != 0 && !dest_pset.has_key("tcp_send_buffer_size"))
67  {
68  dest_pset.put<size_t>("tcp_send_buffer_size", tcp_send_buffer_size);
69  }
70  if (max_fragment_size_words != 0 && !dest_pset.has_key("max_fragment_size_words"))
71  {
72  dest_pset.put<size_t>("max_fragment_size_words", max_fragment_size_words);
73  }
74 
75  dests_mod.put<fhicl::ParameterSet>(d, dest_pset);
76  }
77 
78  for (auto& d : dests_mod.get_pset_names())
79  {
80  try
81  {
82  auto transfer = MakeTransferPlugin(dests_mod, d, TransferInterface::Role::kSend);
83  auto destination_rank = transfer->destination_rank();
84  destinations_.emplace(destination_rank, std::move(transfer));
85  destination_metric_data_[destination_rank] = std::pair<size_t, double>();
86  destination_metric_send_time_[destination_rank] = std::chrono::steady_clock::now();
87  }
88  catch (std::invalid_argument)
89  {
90  TLOG(TLVL_DEBUG) << "Invalid destination specification: " << d;
91  }
92  catch (cet::exception ex)
93  {
94  TLOG(TLVL_WARNING) << "Caught cet::exception: " << ex.what();
95  }
96  catch (...)
97  {
98  TLOG(TLVL_WARNING) << "Non-cet exception while setting up TransferPlugin: " << d << ".";
99  }
100  }
101  if (destinations_.size() == 0)
102  {
103  TLOG(TLVL_ERROR) << "No destinations specified!";
104  }
105  else
106  {
107  auto enabled_dests = pset.get<std::vector<size_t>>("enabled_destinations", std::vector<size_t>());
108  if (enabled_dests.size() == 0)
109  {
110  TLOG(TLVL_INFO) << "enabled_destinations not specified, assuming all destinations enabled.";
111  for (auto& d : destinations_)
112  {
113  enabled_destinations_.insert(d.first);
114  }
115  }
116  else
117  {
118  for (auto& d : enabled_dests)
119  {
120  enabled_destinations_.insert(d);
121  }
122  }
123  }
124  if (use_routing_master_) startTableReceiverThread_();
125 }
126 
128 {
129  TLOG(TLVL_DEBUG) << "Shutting down DataSenderManager BEGIN";
130  should_stop_ = true;
131  for (auto& dest : enabled_destinations_)
132  {
133  if (destinations_.count(dest))
134  {
135  auto sts = destinations_[dest]->moveFragment(std::move(*Fragment::eodFrag(sent_frag_count_.slotCount(dest))));
136  if (sts != TransferInterface::CopyStatus::kSuccess) TLOG(TLVL_ERROR) << "Error sending EOD Fragment to sender rank " << dest;
137  // sendFragTo(std::move(*Fragment::eodFrag(nFragments)), dest, true);
138  }
139  }
140  if (routing_thread_.joinable()) routing_thread_.join();
141  TLOG(TLVL_DEBUG) << "Shutting down DataSenderManager END. Sent " << count() << " fragments.";
142 }
143 
144 
145 void artdaq::DataSenderManager::setupTableListener_()
146 {
147  int sts;
148  table_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
149  if (table_socket_ < 0)
150  {
151  TLOG(TLVL_ERROR) << "Error creating socket for receiving table updates!";
152  exit(1);
153  }
154 
155  struct sockaddr_in si_me_request;
156 
157  int yes = 1;
158  if (setsockopt(table_socket_, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
159  {
160  TLOG(TLVL_ERROR) << " Unable to enable port reuse on request socket";
161  exit(1);
162  }
163  memset(&si_me_request, 0, sizeof(si_me_request));
164  si_me_request.sin_family = AF_INET;
165  si_me_request.sin_port = htons(table_port_);
166  //si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
167  struct in_addr in_addr_s;
168  sts = inet_aton(table_address_.c_str(), &in_addr_s );
169  if (sts == 0)
170  {
171  TLOG(TLVL_ERROR) << "inet_aton says table_address " << table_address_ << " is invalid";
172  }
173  si_me_request.sin_addr.s_addr = in_addr_s.s_addr;
174  if (bind(table_socket_, (struct sockaddr *)&si_me_request, sizeof(si_me_request)) == -1)
175  {
176  TLOG(TLVL_ERROR) << "Cannot bind request socket to port " << table_port_;
177  exit(1);
178  }
179 
180  struct ip_mreq mreq;
181  sts = ResolveHost(table_address_.c_str(), mreq.imr_multiaddr);
182  if (sts == -1)
183  {
184  TLOG(TLVL_ERROR) << "Unable to resolve multicast address for table updates";
185  exit(1);
186  }
187  mreq.imr_interface.s_addr = htonl(INADDR_ANY);
188  if (setsockopt(table_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
189  {
190  TLOG(TLVL_ERROR) << "Unable to join multicast group";
191  exit(1);
192  }
193 }
194 void artdaq::DataSenderManager::startTableReceiverThread_()
195 {
196  if (routing_thread_.joinable()) routing_thread_.join();
197  TLOG(TLVL_INFO) << "Starting Routing Thread";
198  try {
199  routing_thread_ = boost::thread(&DataSenderManager::receiveTableUpdatesLoop_, this);
200  }
201  catch (const boost::exception& e)
202  {
203  TLOG(TLVL_ERROR) << "Caught boost::exception starting Routing Table Receive thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
204  std::cerr << "Caught boost::exception starting Routing Table Receive thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
205  exit(5);
206  }
207 }
208 void artdaq::DataSenderManager::receiveTableUpdatesLoop_()
209 {
210  while (true)
211  {
212  if (should_stop_)
213  {
214  TLOG(TLVL_DEBUG) << __func__ << ": should_stop is " << std::boolalpha << should_stop_ << ", stopping";
215  return;
216  }
217 
218  TLOG(TLVL_TRACE) << __func__ << ": Polling table socket for new routes";
219  if (table_socket_ == -1)
220  {
221  TLOG(TLVL_DEBUG) << __func__ << ": Opening table listener socket";
222  setupTableListener_();
223  }
224  if (table_socket_ == -1)
225  {
226  TLOG(TLVL_DEBUG) << __func__ << ": The listen socket was not opened successfully.";
227  return;
228  }
229  if (ack_socket_ == -1)
230  {
231  ack_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
232  auto sts = ResolveHost(ack_address_.c_str(), ack_port_, ack_addr_);
233  if (sts == -1)
234  {
235  TLOG(TLVL_ERROR) << __func__ << ": Unable to resolve routing_master_address";
236  exit(1);
237  }
238  TLOG(TLVL_DEBUG) << __func__ << ": Ack socket is fd " << ack_socket_;
239  }
240 
241  struct pollfd fd;
242  fd.fd = table_socket_;
243  fd.events = POLLIN | POLLPRI;
244 
245  auto res = poll(&fd, 1, 1000);
246  if (res > 0)
247  {
248  auto first = artdaq::Fragment::InvalidSequenceID;
249  auto last = artdaq::Fragment::InvalidSequenceID;
251 
252  TLOG(TLVL_DEBUG) << __func__ << ": Going to receive RoutingPacketHeader";
253  struct sockaddr_in from;
254  socklen_t len=sizeof(from);
255  auto stss = recvfrom(table_socket_, &hdr, sizeof(artdaq::detail::RoutingPacketHeader), 0, (struct sockaddr*)&from, &len );
256  TLOG(TLVL_DEBUG) << __func__ << ": Received " << stss << " hdr bytes. (sizeof(RoutingPacketHeader) == " << sizeof(detail::RoutingPacketHeader)
257  << " from " << inet_ntoa(from.sin_addr) << ":" << from.sin_port;
258 
259  TRACE(TLVL_DEBUG,"receiveTableUpdatesLoop_: Checking for valid header with nEntries=%lu headerData:0x%016lx%016lx",hdr.nEntries,((unsigned long*)&hdr)[0],((unsigned long*)&hdr)[1]);
260  if (hdr.header != ROUTING_MAGIC)
261  {
262  TLOG(TLVL_TRACE) << __func__ << ": non-RoutingPacket received. No ROUTING_MAGIC. size(bytes)="<<stss;
263  }
264  else if (stss != sizeof(artdaq::detail::RoutingPacketHeader))
265  {
266  TLOG(TLVL_TRACE) << __func__ << ": non-RoutingPacket received. size(bytes)="<<stss;
267  }
268  else
269  {
270  if (routing_master_mode_ != detail::RoutingMasterMode::INVALID && routing_master_mode_ != hdr.mode)
271  {
272  TLOG(TLVL_ERROR) << __func__ << ": Received table has different RoutingMasterMode than expected!";
273  exit(1);
274  }
275  routing_master_mode_ = hdr.mode;
276 
278  TLOG(TLVL_DEBUG) << __func__ << ": Receiving data buffer";
279  auto sts = recvfrom(table_socket_, &buffer[0], sizeof(artdaq::detail::RoutingPacketEntry) * hdr.nEntries, 0, (struct sockaddr*)&from, &len );
280  assert(static_cast<size_t>(sts) == sizeof(artdaq::detail::RoutingPacketEntry) * hdr.nEntries);
281  TLOG(TLVL_DEBUG) << __func__ << ": Received " << sts << " pkt bytes from " << inet_ntoa(from.sin_addr) << ":" << from.sin_port;
282  TRACE(6,"receiveTableUpdatesLoop_: Received a packet of %ld bytes. 1st 16 bytes: 0x%016lx%016lx",sts,((unsigned long*)&buffer[0])[0],((unsigned long*)&buffer[0])[1]);
283 
284  first = buffer[0].sequence_id;
285  last = buffer[buffer.size() - 1].sequence_id;
286 
287  if (first + hdr.nEntries - 1 != last)
288  {
289  TLOG(TLVL_ERROR) << __func__ << ": Skipping this RoutingPacket because the first (" << first << ") and last (" << last << ") entries are inconsistent (sz=" << hdr.nEntries << ")!";
290  continue;
291  }
292  auto thisSeqID = first;
293 
294  {
295  std::unique_lock<std::mutex> lck(routing_mutex_);
296  if (routing_table_.count(last) == 0)
297  {
298  for (auto entry : buffer)
299  {
300  if (thisSeqID != entry.sequence_id)
301  {
302  TLOG(TLVL_ERROR) << __func__ << ": Aborting processing of this RoutingPacket because I encountered an inconsistent entry (seqid=" << entry.sequence_id << ", expected=" << thisSeqID << ")!";
303  last = thisSeqID - 1;
304  break;
305  }
306  thisSeqID++;
307  if (routing_table_.count(entry.sequence_id))
308  {
309  if (routing_table_[entry.sequence_id] != entry.destination_rank)
310  {
311  TLOG(TLVL_ERROR) << __func__ << ": Detected routing table corruption! Recevied update specifying that sequence ID " << entry.sequence_id
312  << " should go to rank " << entry.destination_rank << ", but I had already been told to send it to " << routing_table_[entry.sequence_id] << "!"
313  << " I will use the original value!";
314  }
315  continue;
316  }
317  if (entry.sequence_id < routing_table_last_) continue;
318  routing_table_[entry.sequence_id] = entry.destination_rank;
319  TLOG(TLVL_DEBUG) << __func__ << ": (my_rank=" << my_rank << ") received update: SeqID " << entry.sequence_id
320  << " -> Rank " << entry.destination_rank;
321  }
322  }
323 
324  TLOG(TLVL_DEBUG) << __func__ << ": There are now " << routing_table_.size() << " entries in the Routing Table";
325  if (routing_table_.size() > 0) TLOG(TLVL_DEBUG) << __func__ << ": Last routing table entry is seqID=" << routing_table_.rbegin()->first;
326 
327  auto counter = 0;
328  for (auto& entry : routing_table_)
329  {
330  TLOG(45) << "Routing Table Entry" << counter << ": " << entry.first << " -> " << entry.second;
331  counter++;
332  }
333  }
334 
336  ack.rank = my_rank;
337  ack.first_sequence_id = first;
338  ack.last_sequence_id = last;
339 
340  if (last > routing_table_last_) routing_table_last_ = last;
341 
342  TLOG(TLVL_DEBUG) << __func__ << ": Sending RoutingAckPacket with first= " << first << " and last= " << last << " to " << ack_address_ << ", port " << ack_port_ << " (my_rank = " << my_rank << ")";
343  sendto(ack_socket_, &ack, sizeof(artdaq::detail::RoutingAckPacket), 0, (struct sockaddr *)&ack_addr_, sizeof(ack_addr_));
344  }
345  }
346  }
347 }
348 
350 {
351  std::unique_lock<std::mutex> lck(routing_mutex_);
352  return routing_table_.size();
353 }
354 
356 {
357  std::unique_lock<std::mutex> lck(routing_mutex_);
358  // Find the distance from the next highest sequence ID to the end of the list
359  size_t dist = std::distance(routing_table_.upper_bound(highest_sequence_id_routed_), routing_table_.end());
360  return dist; // If dist == 1, there is one entry left.
361 }
362 
363 int artdaq::DataSenderManager::calcDest_(Fragment::sequence_id_t sequence_id) const
364 {
365  if (enabled_destinations_.size() == 0) return TransferInterface::RECV_TIMEOUT; // No destinations configured.
366  if (!use_routing_master_ && enabled_destinations_.size() == 1) return *enabled_destinations_.begin(); // Trivial case
367 
368  if (use_routing_master_)
369  {
370  auto start = std::chrono::steady_clock::now();
371  TLOG(15) << "calcDest_ use_routing_master check for routing info for seqID="<<sequence_id<<" routing_timeout_ms="<<routing_timeout_ms_<<" should_stop_="<<should_stop_;
372  while (!should_stop_ && (routing_timeout_ms_ <= 0 || TimeUtils::GetElapsedTimeMilliseconds(start) < static_cast<size_t>(routing_timeout_ms_)))
373  {
374  {
375  std::unique_lock<std::mutex> lck(routing_mutex_);
376  if (routing_master_mode_ == detail::RoutingMasterMode::RouteBySequenceID && routing_table_.count(sequence_id))
377  {
378  if (sequence_id > highest_sequence_id_routed_) highest_sequence_id_routed_ = sequence_id;
379  routing_wait_time_.fetch_add(TimeUtils::GetElapsedTimeMicroseconds(start));
380  return routing_table_.at(sequence_id);
381  }
382  else if (routing_master_mode_ == detail::RoutingMasterMode::RouteBySendCount && routing_table_.count(sent_frag_count_.count() + 1))
383  {
384  if (sent_frag_count_.count() + 1 > highest_sequence_id_routed_) highest_sequence_id_routed_ = sent_frag_count_.count() + 1;
385  routing_wait_time_.fetch_add(TimeUtils::GetElapsedTimeMicroseconds(start));
386  return routing_table_.at(sent_frag_count_.count() + 1);
387  }
388  }
389  usleep(routing_timeout_ms_ * 10);
390  }
391  routing_wait_time_.fetch_add(TimeUtils::GetElapsedTimeMicroseconds(start));
392  if (routing_master_mode_ == detail::RoutingMasterMode::RouteBySequenceID)
393  {
394  TLOG(TLVL_WARNING) << "Bad Omen: I don't have routing information for seqID " << sequence_id
395  << " and the Routing Master did not send a table update in routing_timeout_ms window (" << routing_timeout_ms_ << " ms)!";
396  }
397  else
398  {
399  TLOG(TLVL_WARNING) << "Bad Omen: I don't have routing information for send number " << sent_frag_count_.count()
400  << " and the Routing Master did not send a table update in routing_timeout_ms window (" << routing_timeout_ms_ << " ms)!";
401  }
402  }
403  else
404  {
405  auto index = sequence_id % enabled_destinations_.size();
406  auto it = enabled_destinations_.begin();
407  for (; index > 0; --index)
408  {
409  ++it;
410  if (it == enabled_destinations_.end()) it = enabled_destinations_.begin();
411  }
412  return *it;
413  }
415 }
416 
417 std::pair<int, artdaq::TransferInterface::CopyStatus> artdaq::DataSenderManager::sendFragment(Fragment&& frag)
418 {
419  // Precondition: Fragment must be complete and consistent (including
420  // header information).
421  auto start_time = std::chrono::steady_clock::now();
422  if (frag.type() == Fragment::EndOfDataFragmentType)
423  {
424  throw cet::exception("LogicError")
425  << "EOD fragments should not be sent on as received: "
426  << "use sendEODFrag() instead.";
427  }
428  size_t seqID = frag.sequenceID();
429  size_t fragSize = frag.sizeBytes();
430  TLOG(13) << "sendFragment start frag.fragmentHeader()=" << std::hex << (void*)(frag.headerBeginBytes()) << ", szB=" << std::dec << fragSize
431  << ", seqID=" << seqID << ", type=" << frag.typeString();
434  if (broadcast_sends_ || frag.type() == Fragment::EndOfRunFragmentType || frag.type() == Fragment::EndOfSubrunFragmentType || frag.type() == Fragment::InitFragmentType)
435  {
436  for (auto& bdest : enabled_destinations_)
437  {
438  TLOG(TLVL_TRACE) << "sendFragment: Sending fragment with seqId " << seqID << " to destination " << bdest << " (broadcast)";
439  // Gross, we have to copy.
440  Fragment fragCopy(frag);
441  auto sts = destinations_[bdest]->copyFragment(fragCopy, send_timeout_us_);
442  size_t retries = 0; // Tried once, so retries < send_retry_count_ will have it retry send_retry_count_ times
443  while (sts == TransferInterface::CopyStatus::kTimeout && retries < send_retry_count_)
444  {
445  sts = destinations_[bdest]->copyFragment(fragCopy, send_timeout_us_);
446  retries++;
447  }
448  if (sts != TransferInterface::CopyStatus::kSuccess) outsts = sts;
449  sent_frag_count_.incSlot(bdest);
450  }
451  }
452  else if (non_blocking_mode_)
453  {
454  auto count = routing_retry_count_;
455  while (dest == TransferInterface::RECV_TIMEOUT && count > 0)
456  {
457  dest = calcDest_(seqID);
459  {
460  count--;
461  TLOG(TLVL_WARNING) << "Could not get destination for seqID " << seqID << (count > 0 ? ", retrying." : ".");
462  }
463  }
464  if (dest != TransferInterface::RECV_TIMEOUT && destinations_.count(dest) && enabled_destinations_.count(dest))
465  {
466  TLOG(TLVL_TRACE) << "sendFragment: Sending fragment with seqId " << seqID << " to destination " << dest;
468  auto lastWarnTime = std::chrono::steady_clock::now();
469  size_t retries = 0; // Have NOT yet tried, so retries <= send_retry_count_ will have it RETRY send_retry_count_ times
470  while (sts != TransferInterface::CopyStatus::kSuccess && retries <= send_retry_count_)
471  {
472  sts = destinations_[dest]->copyFragment(frag, send_timeout_us_);
473  if (sts != TransferInterface::CopyStatus::kSuccess && TimeUtils::GetElapsedTime(lastWarnTime) >= 1)
474  {
475  TLOG(TLVL_WARNING) << "sendFragment: Sending fragment " << seqID << " to destination " << dest << " failed! Retrying...";
476  lastWarnTime = std::chrono::steady_clock::now();
477  }
478  }
479  if (sts != TransferInterface::CopyStatus::kSuccess) outsts = sts;
480  //sendFragTo(std::move(frag), dest);
481  sent_frag_count_.incSlot(dest);
482  }
483  else if (!should_stop_)
484  TLOG(TLVL_ERROR) << "(in non_blocking) calcDest returned invalid destination rank " << dest << "! This event has been lost: " << seqID
485  << ". enabled_destinantions_.size()="<<enabled_destinations_.size();
486  }
487  else
488  {
489  auto start = std::chrono::steady_clock::now();
490  while (!should_stop_ && dest == TransferInterface::RECV_TIMEOUT)
491  {
492  dest = calcDest_(seqID);
494  {
495  TLOG(TLVL_WARNING) << "Could not get destination for seqID " << seqID << ", send number " << sent_frag_count_.count() << ", retrying. Waited " << TimeUtils::GetElapsedTime(start) << " s for routing information.";
496  usleep(10000);
497  }
498  }
499  if (dest != TransferInterface::RECV_TIMEOUT && destinations_.count(dest) && enabled_destinations_.count(dest))
500  {
501  TLOG(5) << "DataSenderManager::sendFragment: Sending fragment with seqId " << seqID << " to destination " << dest;
503 
504  sts = destinations_[dest]->moveFragment(std::move(frag));
506  TLOG(TLVL_ERROR) << "sendFragment: Sending fragment " << seqID << " to destination "
507  << dest << " failed! Data has been lost!";
508 
509  //sendFragTo(std::move(frag), dest);
510  sent_frag_count_.incSlot(dest);
511  outsts = sts;
512  }
513  else if (!should_stop_)
514  TLOG(TLVL_ERROR) << "calcDest returned invalid destination rank " << dest << "! This event has been lost: " << seqID
515  << ". enabled_destinantions_.size()="<<enabled_destinations_.size();
516  }
517 
518  {
519  std::unique_lock<std::mutex> lck(routing_mutex_);
520  // while (routing_table_.size() > routing_table_max_size_)
521  // {
522  // routing_table_.erase(routing_table_.begin());
523  // }
524  if(routing_master_mode_ == detail::RoutingMasterMode::RouteBySequenceID && routing_table_.find(seqID) != routing_table_.end())
525  routing_table_.erase(routing_table_.find(seqID));
526  else if(routing_table_.find(sent_frag_count_.count()) != routing_table_.end())
527  routing_table_.erase(routing_table_.find(sent_frag_count_.count()));
528  }
529 
530 
531  auto delta_t = TimeUtils::GetElapsedTime(start_time);
532  destination_metric_data_[dest].first += fragSize;
533  destination_metric_data_[dest].second += delta_t;
534 
535  if (metricMan && TimeUtils::GetElapsedTime(destination_metric_send_time_[dest]) > 1)
536  {
537  TLOG(5) << "sendFragment: sending metrics";
538  metricMan->sendMetric("Data Send Time to Rank " + std::to_string(dest), destination_metric_data_[dest].second, "s", 5, MetricMode::Accumulate);
539  metricMan->sendMetric("Data Send Size to Rank " + std::to_string(dest), destination_metric_data_[dest].first, "B", 5, MetricMode::Accumulate);
540  metricMan->sendMetric("Data Send Rate to Rank " + std::to_string(dest), destination_metric_data_[dest].first / destination_metric_data_[dest].second, "B/s", 5, MetricMode::Average);
541  metricMan->sendMetric("Data Send Count to Rank " + std::to_string(dest), sent_frag_count_.slotCount(dest), "fragments", 3, MetricMode::LastPoint);
542 
543  destination_metric_send_time_[dest] = std::chrono::steady_clock::now();
544  destination_metric_data_[dest].first = 0;
545  destination_metric_data_[dest].second = 0.0;
546 
547  if (use_routing_master_)
548  {
549  metricMan->sendMetric("Routing Table Size", GetRoutingTableEntryCount(), "events", 2, MetricMode::LastPoint);
550  if (routing_wait_time_ > 0)
551  {
552  metricMan->sendMetric("Routing Wait Time", static_cast<double>(routing_wait_time_.load()) / 1000000, "s", 2, MetricMode::Average);
553  routing_wait_time_ = 0;
554  }
555  }
556  }
557  TLOG(5) << "sendFragment: Done sending fragment " << seqID;
558  return std::make_pair(dest, outsts);
559 } // artdaq::DataSenderManager::sendFragment
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
Definition: TCPConnect.cc:33
A row of the Routing Table.
hostMap_t MakeHostMap(fhicl::ParameterSet pset, hostMap_t map=hostMap_t())
Make a hostMap_t from a HostMap::Config ParameterSet
Definition: HostMap.hh:66
virtual ~DataSenderManager()
DataSenderManager Destructor.
A RoutingAckPacket contains the rank of the table receiver, plus the first and last sequence IDs in t...
Events should be routed by sequence ID (BR -&gt; EB)
std::unique_ptr< artdaq::TransferInterface > MakeTransferPlugin(const fhicl::ParameterSet &pset, std::string plugin_label, TransferInterface::Role role)
Load a TransferInterface plugin.
std::pair< int, TransferInterface::CopyStatus > sendFragment(Fragment &&frag)
Send the given Fragment. Return the rank of the destination to which the Fragment was sent...
size_t GetRemainingRoutingTableEntries() const
Gets the number of sends remaining in the routing table, in case other parts of the system want to us...
DataSenderManager(const fhicl::ParameterSet &ps)
DataSenderManager Constructor.
Fragment::sequence_id_t first_sequence_id
The first sequence ID in the received RoutingPacket.
The header of the Routing Table, containing the magic bytes and the number of entries.
std::vector< fhicl::ParameterSet > MakeHostMapPset(std::map< int, std::string > input)
Create a list of HostMap::HostConfig ParameterSets from a hostMap_t map
Definition: HostMap.hh:46
size_t GetRoutingTableEntryCount() const
Gets the current size of the Routing Table, in case other parts of the system want to use this inform...
uint32_t header
Magic bytes to make sure the packet wasn&#39;t garbled.
RoutingMasterMode mode
The current mode of the RoutingMaster.
This TransferInterface is a Sender.
Fragment::sequence_id_t last_sequence_id
The last sequence ID in the received RoutingPacket.
std::vector< RoutingPacketEntry > RoutingPacket
A RoutingPacket is simply a vector of RoutingPacketEntry objects. It is not suitable for network tran...
int rank
The rank from which the RoutingAckPacket came.
size_t nEntries
The number of RoutingPacketEntries in the RoutingPacket.
Some error occurred, but no exception was thrown.
The send operation completed successfully.
Value to be returned upon receive timeout.
std::map< int, std::string > hostMap_t
The host_map is a map associating ranks with artdaq::DestinationInfo objects.
Definition: HostMap.hh:39
Events should be routed by send count (EB -&gt; Agg)
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.