artdaq  v3_05_00
DataSenderManager.cc
1 #define TRACE_NAME (app_name + "_DataSenderManager").c_str()
2 #include "artdaq/DAQdata/Globals.hh"
3 #include "artdaq/DAQrate/DataSenderManager.hh"
4 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
5 #include "artdaq/TransferPlugins/detail/HostMap.hh"
6 
7 #include <chrono>
8 #include "canvas/Utilities/Exception.h"
9 #include <arpa/inet.h>
10 #include <netinet/in.h>
11 #include <sys/types.h>
12 #include <poll.h>
13 #include <sys/socket.h>
15 
16 artdaq::DataSenderManager::DataSenderManager(const fhicl::ParameterSet& pset)
17  : destinations_()
18  , destination_metric_data_()
19  , destination_metric_send_time_()
20  , enabled_destinations_()
21  , sent_frag_count_()
22  , broadcast_sends_(pset.get<bool>("broadcast_sends", false))
23  , non_blocking_mode_(pset.get<bool>("nonblocking_sends", false))
24  , send_timeout_us_(pset.get<size_t>("send_timeout_usec", 5000000))
25  , send_retry_count_(pset.get<size_t>("send_retry_count", 2))
26  , routing_master_mode_(detail::RoutingMasterMode::INVALID)
27  , should_stop_(false)
28  , ack_socket_(-1)
29  , table_socket_(-1)
30  , routing_table_last_(0)
31  , routing_table_max_size_(pset.get<size_t>("routing_table_max_size", 1000))
32  , highest_sequence_id_routed_(0)
33 {
34  TLOG(TLVL_DEBUG) << "Received pset: " << pset.to_string();
35 
36  // Validate parameters
37  if (send_timeout_us_ == 0) send_timeout_us_ = std::numeric_limits<size_t>::max();
38 
39  auto rmConfig = pset.get<fhicl::ParameterSet>("routing_table_config", fhicl::ParameterSet());
40  use_routing_master_ = rmConfig.get<bool>("use_routing_master", false);
41  table_port_ = rmConfig.get<int>("table_update_port", 35556);
42  table_address_ = rmConfig.get<std::string>("table_update_address", "227.128.12.28");
43  ack_port_ = rmConfig.get<int>("table_acknowledge_port", 35557);
44  ack_address_ = rmConfig.get<std::string>("routing_master_hostname", "localhost");
45  routing_timeout_ms_ = (rmConfig.get<int>("routing_timeout_ms", 1000));
46  routing_retry_count_ = rmConfig.get<int>("routing_retry_count", 5);
47 
48  hostMap_t host_map = MakeHostMap(pset);
49  size_t tcp_send_buffer_size = pset.get<size_t>("tcp_send_buffer_size", 0);
50  size_t max_fragment_size_words = pset.get<size_t>("max_fragment_size_words", 0);
51 
52  auto dests = pset.get<fhicl::ParameterSet>("destinations", fhicl::ParameterSet());
53  for (auto& d : dests.get_pset_names())
54  {
55  auto dest_pset = dests.get<fhicl::ParameterSet>(d);
56  host_map = MakeHostMap(dest_pset, host_map);
57  }
58  auto host_map_pset = MakeHostMapPset(host_map);
59  fhicl::ParameterSet dests_mod;
60  for (auto& d : dests.get_pset_names())
61  {
62  auto dest_pset = dests.get<fhicl::ParameterSet>(d);
63  dest_pset.erase("host_map");
64  dest_pset.put<std::vector<fhicl::ParameterSet>>("host_map", host_map_pset);
65 
66  if (tcp_send_buffer_size != 0 && !dest_pset.has_key("tcp_send_buffer_size"))
67  {
68  dest_pset.put<size_t>("tcp_send_buffer_size", tcp_send_buffer_size);
69  }
70  if (max_fragment_size_words != 0 && !dest_pset.has_key("max_fragment_size_words"))
71  {
72  dest_pset.put<size_t>("max_fragment_size_words", max_fragment_size_words);
73  }
74 
75  dests_mod.put<fhicl::ParameterSet>(d, dest_pset);
76  }
77 
78  for (auto& d : dests_mod.get_pset_names())
79  {
80  try
81  {
82  auto transfer = MakeTransferPlugin(dests_mod, d, TransferInterface::Role::kSend);
83  auto destination_rank = transfer->destination_rank();
84  destinations_.emplace(destination_rank, std::move(transfer));
85  destination_metric_data_[destination_rank] = std::pair<size_t, double>();
86  destination_metric_send_time_[destination_rank] = std::chrono::steady_clock::now();
87  }
88  catch (const std::invalid_argument&)
89  {
90  TLOG(TLVL_DEBUG) << "Invalid destination specification: " << d;
91  }
92  catch (const cet::exception& ex)
93  {
94  TLOG(TLVL_WARNING) << "Caught cet::exception: " << ex.what();
95  }
96  catch (...)
97  {
98  TLOG(TLVL_WARNING) << "Non-cet exception while setting up TransferPlugin: " << d << ".";
99  }
100  }
101  if (destinations_.size() == 0)
102  {
103  TLOG(TLVL_ERROR) << "No destinations specified!";
104  }
105  else
106  {
107  auto enabled_dests = pset.get<std::vector<size_t>>("enabled_destinations", std::vector<size_t>());
108  if (enabled_dests.size() == 0)
109  {
110  TLOG(TLVL_INFO) << "enabled_destinations not specified, assuming all destinations enabled.";
111  for (auto& d : destinations_)
112  {
113  enabled_destinations_.insert(d.first);
114  }
115  }
116  else
117  {
118  for (auto& d : enabled_dests)
119  {
120  enabled_destinations_.insert(d);
121  }
122  }
123  }
124  if (use_routing_master_) startTableReceiverThread_();
125 }
126 
128 {
129  TLOG(TLVL_DEBUG) << "Shutting down DataSenderManager BEGIN";
130  should_stop_ = true;
131  for (auto& dest : enabled_destinations_)
132  {
133  if (destinations_.count(dest))
134  {
135  auto sts = destinations_[dest]->transfer_fragment_reliable_mode(std::move(*Fragment::eodFrag(sent_frag_count_.slotCount(dest))));
136  if (sts != TransferInterface::CopyStatus::kSuccess) TLOG(TLVL_ERROR) << "Error sending EOD Fragment to sender rank " << dest;
137  // sendFragTo(std::move(*Fragment::eodFrag(nFragments)), dest, true);
138  }
139  }
140  if (routing_thread_.joinable()) routing_thread_.join();
141  TLOG(TLVL_DEBUG) << "Shutting down DataSenderManager END. Sent " << count() << " fragments.";
142 }
143 
144 
145 void artdaq::DataSenderManager::setupTableListener_()
146 {
147  int sts;
148  table_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
149  if (table_socket_ < 0)
150  {
151  TLOG(TLVL_ERROR) << "Error creating socket for receiving table updates!";
152  exit(1);
153  }
154 
155  struct sockaddr_in si_me_request;
156 
157  int yes = 1;
158  if (setsockopt(table_socket_, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
159  {
160  TLOG(TLVL_ERROR) << " Unable to enable port reuse on request socket";
161  exit(1);
162  }
163  memset(&si_me_request, 0, sizeof(si_me_request));
164  si_me_request.sin_family = AF_INET;
165  si_me_request.sin_port = htons(table_port_);
166  //si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
167  struct in_addr in_addr_s;
168  sts = inet_aton(table_address_.c_str(), &in_addr_s );
169  if (sts == 0)
170  {
171  TLOG(TLVL_ERROR) << "inet_aton says table_address " << table_address_ << " is invalid";
172  }
173  si_me_request.sin_addr.s_addr = in_addr_s.s_addr;
174  if (bind(table_socket_, (struct sockaddr *)&si_me_request, sizeof(si_me_request)) == -1)
175  {
176  TLOG(TLVL_ERROR) << "Cannot bind request socket to port " << table_port_;
177  exit(1);
178  }
179 
180  struct ip_mreq mreq;
181  sts = ResolveHost(table_address_.c_str(), mreq.imr_multiaddr);
182  if (sts == -1)
183  {
184  TLOG(TLVL_ERROR) << "Unable to resolve multicast address for table updates";
185  exit(1);
186  }
187  mreq.imr_interface.s_addr = htonl(INADDR_ANY);
188  if (setsockopt(table_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
189  {
190  TLOG(TLVL_ERROR) << "Unable to join multicast group";
191  exit(1);
192  }
193 }
194 void artdaq::DataSenderManager::startTableReceiverThread_()
195 {
196  if (routing_thread_.joinable()) routing_thread_.join();
197  TLOG(TLVL_INFO) << "Starting Routing Thread";
198  try {
199  routing_thread_ = boost::thread(&DataSenderManager::receiveTableUpdatesLoop_, this);
200  }
201  catch (const boost::exception& e)
202  {
203  TLOG(TLVL_ERROR) << "Caught boost::exception starting Routing Table Receive thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
204  std::cerr << "Caught boost::exception starting Routing Table Receive thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
205  exit(5);
206  }
207 }
208 void artdaq::DataSenderManager::receiveTableUpdatesLoop_()
209 {
210  while (true)
211  {
212  if (should_stop_)
213  {
214  TLOG(TLVL_DEBUG) << __func__ << ": should_stop is " << std::boolalpha << should_stop_ << ", stopping";
215  return;
216  }
217 
218  TLOG(TLVL_TRACE) << __func__ << ": Polling table socket for new routes";
219  if (table_socket_ == -1)
220  {
221  TLOG(TLVL_DEBUG) << __func__ << ": Opening table listener socket";
222  setupTableListener_();
223  }
224  if (table_socket_ == -1)
225  {
226  TLOG(TLVL_DEBUG) << __func__ << ": The listen socket was not opened successfully.";
227  return;
228  }
229  if (ack_socket_ == -1)
230  {
231  ack_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
232  auto sts = ResolveHost(ack_address_.c_str(), ack_port_, ack_addr_);
233  if (sts == -1)
234  {
235  TLOG(TLVL_ERROR) << __func__ << ": Unable to resolve routing_master_address";
236  exit(1);
237  }
238  TLOG(TLVL_DEBUG) << __func__ << ": Ack socket is fd " << ack_socket_;
239  }
240 
241  struct pollfd fd;
242  fd.fd = table_socket_;
243  fd.events = POLLIN | POLLPRI;
244 
245  auto res = poll(&fd, 1, 1000);
246  if (res > 0)
247  {
248  auto first = artdaq::Fragment::InvalidSequenceID;
249  auto last = artdaq::Fragment::InvalidSequenceID;
250  std::vector<uint8_t> buf(MAX_ROUTING_TABLE_SIZE);
252 
253  TLOG(TLVL_DEBUG) << __func__ << ": Going to receive RoutingPacketHeader";
254  struct sockaddr_in from;
255  socklen_t len=sizeof(from);
256  auto stss = recvfrom(table_socket_, &buf[0], MAX_ROUTING_TABLE_SIZE, 0, (struct sockaddr*)&from, &len );
257  TLOG(TLVL_DEBUG) << __func__ << ": Received " << stss << " bytes from " << inet_ntoa(from.sin_addr) << ":" << from.sin_port;
258 
259  if (stss > static_cast<ssize_t>(sizeof(hdr)))
260  {
261  memcpy(&hdr, &buf[0], sizeof(artdaq::detail::RoutingPacketHeader));
262  }
263  else
264  {
265  TLOG(TLVL_TRACE) << __func__ << ": Incorrect size received. Discarding.";
266  continue;
267  }
268 
269  TRACE(TLVL_DEBUG,"receiveTableUpdatesLoop_: Checking for valid header with nEntries=%lu headerData:0x%016lx%016lx",hdr.nEntries,((unsigned long*)&hdr)[0],((unsigned long*)&hdr)[1]);
270  if (hdr.header != ROUTING_MAGIC)
271  {
272  TLOG(TLVL_TRACE) << __func__ << ": non-RoutingPacket received. No ROUTING_MAGIC. size(bytes)="<<stss;
273  }
274  else
275  {
276  if (routing_master_mode_ != detail::RoutingMasterMode::INVALID && routing_master_mode_ != hdr.mode)
277  {
278  TLOG(TLVL_ERROR) << __func__ << ": Received table has different RoutingMasterMode than expected!";
279  exit(1);
280  }
281  routing_master_mode_ = hdr.mode;
282 
284  assert(static_cast<size_t>(stss) == sizeof(artdaq::detail::RoutingPacketHeader) + sizeof(artdaq::detail::RoutingPacketEntry) * hdr.nEntries);
285  memcpy(&buffer[0], &buf[sizeof(artdaq::detail::RoutingPacketHeader)], sizeof(artdaq::detail::RoutingPacketEntry) * hdr.nEntries);
286  TRACE(6,"receiveTableUpdatesLoop_: Received a packet of %ld bytes. 1st 16 bytes: 0x%016lx%016lx",stss,((unsigned long*)&buffer[0])[0],((unsigned long*)&buffer[0])[1]);
287 
288  first = buffer[0].sequence_id;
289  last = buffer[buffer.size() - 1].sequence_id;
290 
291  if (first + hdr.nEntries - 1 != last)
292  {
293  TLOG(TLVL_ERROR) << __func__ << ": Skipping this RoutingPacket because the first (" << first << ") and last (" << last << ") entries are inconsistent (sz=" << hdr.nEntries << ")!";
294  continue;
295  }
296  auto thisSeqID = first;
297 
298  {
299  std::unique_lock<std::mutex> lck(routing_mutex_);
300  if (routing_table_.count(last) == 0)
301  {
302  for (auto entry : buffer)
303  {
304  if (thisSeqID != entry.sequence_id)
305  {
306  TLOG(TLVL_ERROR) << __func__ << ": Aborting processing of this RoutingPacket because I encountered an inconsistent entry (seqid=" << entry.sequence_id << ", expected=" << thisSeqID << ")!";
307  last = thisSeqID - 1;
308  break;
309  }
310  thisSeqID++;
311  if (routing_table_.count(entry.sequence_id))
312  {
313  if (routing_table_[entry.sequence_id] != entry.destination_rank)
314  {
315  TLOG(TLVL_ERROR) << __func__ << ": Detected routing table corruption! Recevied update specifying that sequence ID " << entry.sequence_id
316  << " should go to rank " << entry.destination_rank << ", but I had already been told to send it to " << routing_table_[entry.sequence_id] << "!"
317  << " I will use the original value!";
318  }
319  continue;
320  }
321  if (entry.sequence_id < routing_table_last_) continue;
322  routing_table_[entry.sequence_id] = entry.destination_rank;
323  TLOG(TLVL_DEBUG) << __func__ << ": (my_rank=" << my_rank << ") received update: SeqID " << entry.sequence_id
324  << " -> Rank " << entry.destination_rank;
325  }
326  }
327 
328  TLOG(TLVL_DEBUG) << __func__ << ": There are now " << routing_table_.size() << " entries in the Routing Table";
329  if (routing_table_.size() > 0) TLOG(TLVL_DEBUG) << __func__ << ": Last routing table entry is seqID=" << routing_table_.rbegin()->first;
330 
331  auto counter = 0;
332  for (auto& entry : routing_table_)
333  {
334  TLOG(45) << "Routing Table Entry" << counter << ": " << entry.first << " -> " << entry.second;
335  counter++;
336  }
337  }
338 
340  ack.rank = my_rank;
341  ack.first_sequence_id = first;
342  ack.last_sequence_id = last;
343 
344  if (last > routing_table_last_) routing_table_last_ = last;
345 
346  TLOG(TLVL_DEBUG) << __func__ << ": Sending RoutingAckPacket with first= " << first << " and last= " << last << " to " << ack_address_ << ", port " << ack_port_ << " (my_rank = " << my_rank << ")";
347  sendto(ack_socket_, &ack, sizeof(artdaq::detail::RoutingAckPacket), 0, (struct sockaddr *)&ack_addr_, sizeof(ack_addr_));
348  }
349  }
350  }
351 }
352 
354 {
355  std::unique_lock<std::mutex> lck(routing_mutex_);
356  return routing_table_.size();
357 }
358 
360 {
361  std::unique_lock<std::mutex> lck(routing_mutex_);
362  // Find the distance from the next highest sequence ID to the end of the list
363  size_t dist = std::distance(routing_table_.upper_bound(highest_sequence_id_routed_), routing_table_.end());
364  return dist; // If dist == 1, there is one entry left.
365 }
366 
367 int artdaq::DataSenderManager::calcDest_(Fragment::sequence_id_t sequence_id) const
368 {
369  if (enabled_destinations_.size() == 0) return TransferInterface::RECV_TIMEOUT; // No destinations configured.
370  if (!use_routing_master_ && enabled_destinations_.size() == 1) return *enabled_destinations_.begin(); // Trivial case
371 
372  if (use_routing_master_)
373  {
374  auto start = std::chrono::steady_clock::now();
375  TLOG(15) << "calcDest_ use_routing_master check for routing info for seqID="<<sequence_id<<" routing_timeout_ms="<<routing_timeout_ms_<<" should_stop_="<<should_stop_;
376  while (!should_stop_ && (routing_timeout_ms_ <= 0 || TimeUtils::GetElapsedTimeMilliseconds(start) < static_cast<size_t>(routing_timeout_ms_)))
377  {
378  {
379  std::unique_lock<std::mutex> lck(routing_mutex_);
380  if (routing_master_mode_ == detail::RoutingMasterMode::RouteBySequenceID && routing_table_.count(sequence_id))
381  {
382  if (sequence_id > highest_sequence_id_routed_) highest_sequence_id_routed_ = sequence_id;
383  routing_wait_time_.fetch_add(TimeUtils::GetElapsedTimeMicroseconds(start));
384  return routing_table_.at(sequence_id);
385  }
386  else if (routing_master_mode_ == detail::RoutingMasterMode::RouteBySendCount && routing_table_.count(sent_frag_count_.count() + 1))
387  {
388  if (sent_frag_count_.count() + 1 > highest_sequence_id_routed_) highest_sequence_id_routed_ = sent_frag_count_.count() + 1;
389  routing_wait_time_.fetch_add(TimeUtils::GetElapsedTimeMicroseconds(start));
390  return routing_table_.at(sent_frag_count_.count() + 1);
391  }
392  }
393  usleep(routing_timeout_ms_ * 10);
394  }
395  routing_wait_time_.fetch_add(TimeUtils::GetElapsedTimeMicroseconds(start));
396  if (routing_master_mode_ == detail::RoutingMasterMode::RouteBySequenceID)
397  {
398  TLOG(TLVL_WARNING) << "Bad Omen: I don't have routing information for seqID " << sequence_id
399  << " and the Routing Master did not send a table update in routing_timeout_ms window (" << routing_timeout_ms_ << " ms)!";
400  }
401  else
402  {
403  TLOG(TLVL_WARNING) << "Bad Omen: I don't have routing information for send number " << sent_frag_count_.count()
404  << " and the Routing Master did not send a table update in routing_timeout_ms window (" << routing_timeout_ms_ << " ms)!";
405  }
406  }
407  else
408  {
409  auto index = sequence_id % enabled_destinations_.size();
410  auto it = enabled_destinations_.begin();
411  for (; index > 0; --index)
412  {
413  ++it;
414  if (it == enabled_destinations_.end()) it = enabled_destinations_.begin();
415  }
416  return *it;
417  }
419 }
420 
421 std::pair<int, artdaq::TransferInterface::CopyStatus> artdaq::DataSenderManager::sendFragment(Fragment&& frag)
422 {
423  // Precondition: Fragment must be complete and consistent (including
424  // header information).
425  auto start_time = std::chrono::steady_clock::now();
426  if (frag.type() == Fragment::EndOfDataFragmentType)
427  {
428  throw cet::exception("LogicError")
429  << "EOD fragments should not be sent on as received: "
430  << "use sendEODFrag() instead.";
431  }
432  size_t seqID = frag.sequenceID();
433  size_t fragSize = frag.sizeBytes();
434  TLOG(13) << "sendFragment start frag.fragmentHeader()=" << std::hex << (void*)(frag.headerBeginBytes()) << ", szB=" << std::dec << fragSize
435  << ", seqID=" << seqID << ", type=" << frag.typeString();
438  if (broadcast_sends_ || frag.type() == Fragment::EndOfRunFragmentType || frag.type() == Fragment::EndOfSubrunFragmentType || frag.type() == Fragment::InitFragmentType)
439  {
440  for (auto& bdest : enabled_destinations_)
441  {
442  TLOG(TLVL_TRACE) << "sendFragment: Sending fragment with seqId " << seqID << " to destination " << bdest << " (broadcast)";
443  // Gross, we have to copy.
445  size_t retries = 0; // Have NOT yet tried, so retries <= send_retry_count_ will have it RETRY send_retry_count_ times
446  while (sts == TransferInterface::CopyStatus::kTimeout && retries <= send_retry_count_)
447  {
448  if (!non_blocking_mode_)
449  {
450  sts = destinations_[bdest]->transfer_fragment_reliable_mode(Fragment(frag));
451  }
452  else
453  {
454  sts = destinations_[bdest]->transfer_fragment_min_blocking_mode(frag, send_timeout_us_);
455  }
456  ++retries;
457  }
458  if (sts != TransferInterface::CopyStatus::kSuccess) outsts = sts;
459  sent_frag_count_.incSlot(bdest);
460  }
461  }
462  else if (non_blocking_mode_)
463  {
464  auto count = routing_retry_count_;
465  while (dest == TransferInterface::RECV_TIMEOUT && count > 0)
466  {
467  dest = calcDest_(seqID);
469  {
470  count--;
471  TLOG(TLVL_WARNING) << "Could not get destination for seqID " << seqID << (count > 0 ? ", retrying." : ".");
472  }
473  }
474  if (dest != TransferInterface::RECV_TIMEOUT && destinations_.count(dest) && enabled_destinations_.count(dest))
475  {
476  TLOG(TLVL_TRACE) << "sendFragment: Sending fragment with seqId " << seqID << " to destination " << dest;
478  auto lastWarnTime = std::chrono::steady_clock::now();
479  size_t retries = 0; // Have NOT yet tried, so retries <= send_retry_count_ will have it RETRY send_retry_count_ times
480  while (sts != TransferInterface::CopyStatus::kSuccess && retries <= send_retry_count_)
481  {
482  sts = destinations_[dest]->transfer_fragment_min_blocking_mode(frag, send_timeout_us_);
483  if (sts != TransferInterface::CopyStatus::kSuccess && TimeUtils::GetElapsedTime(lastWarnTime) >= 1)
484  {
485  TLOG(TLVL_WARNING) << "sendFragment: Sending fragment " << seqID << " to destination " << dest << " failed! Retrying...";
486  lastWarnTime = std::chrono::steady_clock::now();
487  }
488  ++retries;
489  }
490  if (sts != TransferInterface::CopyStatus::kSuccess) outsts = sts;
491  //sendFragTo(std::move(frag), dest);
492  sent_frag_count_.incSlot(dest);
493  }
494  else if (!should_stop_)
495  TLOG(TLVL_ERROR) << "(in non_blocking) calcDest returned invalid destination rank " << dest << "! This event has been lost: " << seqID
496  << ". enabled_destinantions_.size()="<<enabled_destinations_.size();
497  }
498  else
499  {
500  auto start = std::chrono::steady_clock::now();
501  while (!should_stop_ && dest == TransferInterface::RECV_TIMEOUT)
502  {
503  dest = calcDest_(seqID);
505  {
506  TLOG(TLVL_WARNING) << "Could not get destination for seqID " << seqID << ", send number " << sent_frag_count_.count() << ", retrying. Waited " << TimeUtils::GetElapsedTime(start) << " s for routing information.";
507  usleep(10000);
508  }
509  }
510  if (dest != TransferInterface::RECV_TIMEOUT && destinations_.count(dest) && enabled_destinations_.count(dest))
511  {
512  TLOG(5) << "DataSenderManager::sendFragment: Sending fragment with seqId " << seqID << " to destination " << dest;
514 
515  sts = destinations_[dest]->transfer_fragment_reliable_mode(std::move(frag));
517  TLOG(TLVL_ERROR) << "sendFragment: Sending fragment " << seqID << " to destination "
518  << dest << " failed! Data has been lost!";
519 
520  //sendFragTo(std::move(frag), dest);
521  sent_frag_count_.incSlot(dest);
522  outsts = sts;
523  }
524  else if (!should_stop_)
525  TLOG(TLVL_ERROR) << "calcDest returned invalid destination rank " << dest << "! This event has been lost: " << seqID
526  << ". enabled_destinantions_.size()="<<enabled_destinations_.size();
527  }
528 
529  {
530  std::unique_lock<std::mutex> lck(routing_mutex_);
531  // while (routing_table_.size() > routing_table_max_size_)
532  // {
533  // routing_table_.erase(routing_table_.begin());
534  // }
535  if(routing_master_mode_ == detail::RoutingMasterMode::RouteBySequenceID && routing_table_.find(seqID) != routing_table_.end())
536  routing_table_.erase(routing_table_.find(seqID));
537  else if(routing_table_.find(sent_frag_count_.count()) != routing_table_.end())
538  routing_table_.erase(routing_table_.find(sent_frag_count_.count()));
539  }
540 
541 
542  auto delta_t = TimeUtils::GetElapsedTime(start_time);
543  destination_metric_data_[dest].first += fragSize;
544  destination_metric_data_[dest].second += delta_t;
545 
546  if (metricMan && TimeUtils::GetElapsedTime(destination_metric_send_time_[dest]) > 1)
547  {
548  TLOG(5) << "sendFragment: sending metrics";
549  metricMan->sendMetric("Data Send Time to Rank " + std::to_string(dest), destination_metric_data_[dest].second, "s", 5, MetricMode::Accumulate);
550  metricMan->sendMetric("Data Send Size to Rank " + std::to_string(dest), destination_metric_data_[dest].first, "B", 5, MetricMode::Accumulate);
551  metricMan->sendMetric("Data Send Rate to Rank " + std::to_string(dest), destination_metric_data_[dest].first / destination_metric_data_[dest].second, "B/s", 5, MetricMode::Average);
552  metricMan->sendMetric("Data Send Count to Rank " + std::to_string(dest), sent_frag_count_.slotCount(dest), "fragments", 3, MetricMode::LastPoint);
553 
554  destination_metric_send_time_[dest] = std::chrono::steady_clock::now();
555  destination_metric_data_[dest].first = 0;
556  destination_metric_data_[dest].second = 0.0;
557 
558  if (use_routing_master_)
559  {
560  metricMan->sendMetric("Routing Table Size", GetRoutingTableEntryCount(), "events", 2, MetricMode::LastPoint);
561  if (routing_wait_time_ > 0)
562  {
563  metricMan->sendMetric("Routing Wait Time", static_cast<double>(routing_wait_time_.load()) / 1000000, "s", 2, MetricMode::Average);
564  routing_wait_time_ = 0;
565  }
566  }
567  }
568  TLOG(5) << "sendFragment: Done sending fragment " << seqID << " to dest="<<dest;
569  return std::make_pair(dest, outsts);
570 } // artdaq::DataSenderManager::sendFragment
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
Definition: TCPConnect.cc:33
A row of the Routing Table.
hostMap_t MakeHostMap(fhicl::ParameterSet pset, hostMap_t map=hostMap_t())
Make a hostMap_t from a HostMap::Config ParameterSet
Definition: HostMap.hh:66
virtual ~DataSenderManager()
DataSenderManager Destructor.
A RoutingAckPacket contains the rank of the table receiver, plus the first and last sequence IDs in t...
Events should be routed by sequence ID (BR -&gt; EB)
std::unique_ptr< artdaq::TransferInterface > MakeTransferPlugin(const fhicl::ParameterSet &pset, std::string plugin_label, TransferInterface::Role role)
Load a TransferInterface plugin.
std::pair< int, TransferInterface::CopyStatus > sendFragment(Fragment &&frag)
Send the given Fragment. Return the rank of the destination to which the Fragment was sent...
size_t GetRemainingRoutingTableEntries() const
Gets the number of sends remaining in the routing table, in case other parts of the system want to us...
DataSenderManager(const fhicl::ParameterSet &ps)
DataSenderManager Constructor.
Fragment::sequence_id_t first_sequence_id
The first sequence ID in the received RoutingPacket.
The header of the Routing Table, containing the magic bytes and the number of entries.
std::vector< fhicl::ParameterSet > MakeHostMapPset(std::map< int, std::string > input)
Create a list of HostMap::HostConfig ParameterSets from a hostMap_t map
Definition: HostMap.hh:46
size_t GetRoutingTableEntryCount() const
Gets the current size of the Routing Table, in case other parts of the system want to use this inform...
uint32_t header
Magic bytes to make sure the packet wasn&#39;t garbled.
RoutingMasterMode mode
The current mode of the RoutingMaster.
This TransferInterface is a Sender.
Fragment::sequence_id_t last_sequence_id
The last sequence ID in the received RoutingPacket.
std::vector< RoutingPacketEntry > RoutingPacket
A RoutingPacket is simply a vector of RoutingPacketEntry objects. It is not suitable for network tran...
int rank
The rank from which the RoutingAckPacket came.
size_t nEntries
The number of RoutingPacketEntries in the RoutingPacket.
Some error occurred, but no exception was thrown.
The send operation completed successfully.
Value to be returned upon receive timeout.
std::map< int, std::string > hostMap_t
The host_map is a map associating ranks with artdaq::DestinationInfo objects.
Definition: HostMap.hh:39
Events should be routed by send count (EB -&gt; Agg)
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.