artdaq  v3_12_02
Multicast_transfer.cc
1 #include "artdaq/DAQdata/Globals.hh"
2 #define TRACE_NAME (app_name + "_MulticastTransfer").c_str()
3 
4 #include "artdaq/TransferPlugins/TransferInterface.hh"
5 
6 #include "artdaq-core/Data/Fragment.hh"
7 #include "artdaq-core/Utilities/ExceptionHandler.hh"
8 
9 #include "cetlib_except/exception.h"
10 #include "fhiclcpp/ParameterSet.h"
11 
12 #include <boost/asio.hpp>
13 
14 #include <bitset>
15 #include <cassert>
16 #include <iostream>
17 #include <string>
18 #include <type_traits>
19 #include <vector>
20 
21 #pragma GCC diagnostic push
22 #pragma GCC diagnostic ignored "-Wunused-parameter"
23 
24 namespace artdaq {
29 {
30 public:
31  using byte_t = artdaq::Fragment::byte_t;
32 
36  ~MulticastTransfer() override = default;
37 
55  MulticastTransfer(fhicl::ParameterSet const& ps, Role role);
56 
63  int receiveFragment(artdaq::Fragment& fragment,
64  size_t receiveTimeout) override;
65 
72  int receiveFragmentHeader(detail::RawFragmentHeader& header, size_t receiveTimeout) override;
73 
80  int receiveFragmentData(RawDataType* destination, size_t wordCount) override;
81 
88  CopyStatus transfer_fragment_min_blocking_mode(artdaq::Fragment const& fragment, size_t send_timeout_usec) override;
89 
95  CopyStatus transfer_fragment_reliable_mode(artdaq::Fragment&& fragment) override;
96 
101  bool isRunning() override { return socket_ != nullptr; }
102 
107  void flush_buffers() override {}
108 
109 private:
110  MulticastTransfer(MulticastTransfer const&) = delete;
112  MulticastTransfer& operator=(MulticastTransfer const&) = delete;
113  MulticastTransfer& operator=(MulticastTransfer&&) = delete;
114 
115  void fill_staging_memory(const artdaq::Fragment& frag);
116 
117  template<typename T>
118  void book_container_of_buffers(std::vector<T>& buffers,
119  size_t fragment_size,
120  size_t total_subfragments,
121  size_t first_subfragment_num,
122  size_t last_subfragment_num);
123 
124  void get_fragment_quantities(const boost::asio::mutable_buffer& buf, size_t& payload_size, size_t& fragment_size,
125  size_t& expected_subfragments);
126 
127  void set_receive_buffer_size(size_t recv_buff_size);
128 
129  class subfragment_identifier
130  {
131  public:
132  subfragment_identifier(size_t sequenceID, size_t fragmentID, size_t subfragment_number)
133  : sequenceID_(sequenceID)
134  , fragmentID_(fragmentID)
135  , subfragment_number_(subfragment_number) {}
136 
137  size_t sequenceID() const { return sequenceID_; }
138  size_t fragmentID() const { return fragmentID_; }
139  size_t subfragment_number() const { return subfragment_number_; }
140 
141  private:
142  size_t sequenceID_;
143  size_t fragmentID_;
144  size_t subfragment_number_;
145  };
146 
147  std::unique_ptr<boost::asio::io_service> io_service_;
148 
149  std::unique_ptr<boost::asio::ip::udp::endpoint> local_endpoint_;
150  std::unique_ptr<boost::asio::ip::udp::endpoint> multicast_endpoint_;
151  std::unique_ptr<boost::asio::ip::udp::endpoint> opposite_endpoint_;
152 
153  std::unique_ptr<boost::asio::ip::udp::socket> socket_;
154 
155  size_t subfragment_size_;
156  size_t subfragments_per_send_;
157 
158  size_t pause_on_copy_usecs_;
159  Fragment fragment_buffer_;
160 
161  std::vector<byte_t> staging_memory_;
162 
163  std::vector<boost::asio::mutable_buffer> receive_buffers_;
164 };
165 } // namespace artdaq
166 
167 artdaq::MulticastTransfer::MulticastTransfer(fhicl::ParameterSet const& pset, Role role)
168  : TransferInterface(pset, role)
169  , io_service_(std::make_unique<std::remove_reference<decltype(*io_service_)>::type>())
170  , local_endpoint_(nullptr)
171  , multicast_endpoint_(nullptr)
172  , opposite_endpoint_(std::make_unique<std::remove_reference<decltype(*opposite_endpoint_)>::type>())
173  , socket_(nullptr)
174  , subfragment_size_(pset.get<size_t>("subfragment_size"))
175  , subfragments_per_send_(pset.get<size_t>("subfragments_per_send"))
176  , pause_on_copy_usecs_(pset.get<size_t>("pause_on_copy_usecs", 0))
177 {
178  try
179  {
180  portMan->UpdateConfiguration(pset);
181  auto port = portMan->GetMulticastTransferPort(source_rank());
182  auto multicast_address = boost::asio::ip::address::from_string(portMan->GetMulticastTransferGroupAddress());
183  auto local_address = boost::asio::ip::address::from_string(pset.get<std::string>("local_address"));
184 
185  TLOG(TLVL_DEBUG + 32) << GetTraceName() << "multicast address is set to " << multicast_address;
186  TLOG(TLVL_DEBUG + 32) << GetTraceName() << "local address is set to " << local_address;
187 
189  {
190  local_endpoint_ = std::make_unique<std::remove_reference<decltype(*local_endpoint_)>::type>(local_address, 0);
191  multicast_endpoint_ = std::make_unique<std::remove_reference<decltype(*multicast_endpoint_)>::type>(multicast_address, port);
192 
193  socket_ = std::make_unique<std::remove_reference<decltype(*socket_)>::type>(*io_service_,
194  multicast_endpoint_->protocol());
195  socket_->bind(*local_endpoint_);
196  }
197  else
198  { // TransferInterface::role() == Role::kReceive
199 
200  // Create the socket so that multiple may be bound to the same address.
201 
202  local_endpoint_ = std::make_unique<std::remove_reference<decltype(*local_endpoint_)>::type>(local_address, port);
203  socket_ = std::make_unique<std::remove_reference<decltype(*socket_)>::type>(*io_service_,
204  local_endpoint_->protocol());
205 
206  boost::system::error_code ec;
207 
208  socket_->set_option(boost::asio::ip::udp::socket::reuse_address(true), ec);
209 
210  if (ec.value() != 0)
211  {
212  TLOG(TLVL_ERROR) << "boost::system::error_code with value " << ec << " was found in setting reuse_address option";
213  }
214 
215  set_receive_buffer_size(pset.get<size_t>("receive_buffer_size", 0));
216 
217  socket_->bind(boost::asio::ip::udp::endpoint(multicast_address, port));
218 
219  // Join the multicast group.
220 
221  socket_->set_option(boost::asio::ip::multicast::join_group(multicast_address), ec);
222 
223  if (ec.value() != 0)
224  {
225  TLOG(TLVL_ERROR) << "boost::system::error_code with value " << ec << " was found in attempt to join multicast group";
226  }
227  }
228  }
229  catch (...)
230  {
231  ExceptionHandler(ExceptionHandlerRethrow::yes, "Problem setting up the socket in MulticastTransfer");
232  }
233 
234  auto max_subfragments =
235  static_cast<size_t>(std::ceil(max_fragment_size_words_ / static_cast<float>(subfragment_size_)));
236 
237  staging_memory_.resize(max_subfragments * (sizeof(subfragment_identifier) + subfragment_size_));
238 
240  {
241  book_container_of_buffers(receive_buffers_, max_fragment_size_words_, max_subfragments, 0, max_subfragments - 1);
242  }
243 
244  TLOG(TLVL_DEBUG + 32) << GetTraceName() << "max_subfragments is " << max_subfragments;
245  TLOG(TLVL_DEBUG + 32) << GetTraceName() << "Staging buffer size is " << staging_memory_.size();
246 }
247 
248 #pragma GCC diagnostic push
249 #pragma GCC diagnostic ignored "-Wunused-variable"
250 
251 int artdaq::MulticastTransfer::receiveFragment(artdaq::Fragment& fragment,
252  size_t receiveTimeout)
253 {
254  assert(TransferInterface::role() == Role::kReceive);
255 
256  if (fragment.dataSizeBytes() > 0)
257  {
258  throw cet::exception("MulticastTransfer") << "Error in MulticastTransfer::receiveFragmentFrom: " // NOLINT(cert-err60-cpp)
259  << "nonzero payload found in fragment passed as argument";
260  }
261 
262  static bool print_warning = true;
263 
264  if (print_warning)
265  {
266  TLOG(TLVL_WARNING) << "Please note that MulticastTransfer::receiveFragmentFrom does not use its receiveTimeout argument";
267  print_warning = false;
268  }
269 
270  fragment.resizeBytes(max_fragment_size_words_ - sizeof(artdaq::detail::RawFragmentHeader));
271 
272  static auto current_sequenceID = std::numeric_limits<Fragment::sequence_id_t>::max();
273  static auto current_fragmentID = std::numeric_limits<Fragment::fragment_id_t>::max();
274 
275  size_t fragment_size = 0;
276  size_t expected_subfragments = 0;
277  size_t current_subfragments = 0;
278  bool fragment_complete = false;
279  bool last_fragment_truncated = false;
280 
281  while (true)
282  {
283  auto bytes_received = socket_->receive_from(receive_buffers_, *opposite_endpoint_);
284 
285  size_t bytes_processed = 0;
286 
287  for (auto& buf : receive_buffers_)
288  {
289  auto buf_size = boost::asio::buffer_size(buf);
290  auto size_t_ptr = boost::asio::buffer_cast<const size_t*>(buf);
291  auto seqID = *size_t_ptr;
292  auto fragID = *(size_t_ptr + 1); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
293  auto subfragID = *(size_t_ptr + 2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
294 
295  if (seqID != current_sequenceID || fragID != current_fragmentID)
296  {
297  // JCF, Jun-22-2016
298  // Code currently operates under the assumption that all subfragments from the call are from the same fragment
299 
300  assert(bytes_processed == 0);
301 
302  if (current_subfragments < expected_subfragments)
303  {
304  last_fragment_truncated = true;
305 
306  if (expected_subfragments != std::numeric_limits<size_t>::max())
307  {
308  TLOG(TLVL_WARNING) << "Warning: only received " << current_subfragments << " subfragments for fragment with seqID = " << current_sequenceID << ", fragID = " << current_fragmentID << " (expected " << expected_subfragments << ")";
309  }
310  else
311  {
312  TLOG(TLVL_WARNING) << "Warning: only received " << current_subfragments << " subfragments for fragment with seqID = " << current_sequenceID << ", fragID = " << current_fragmentID << ", # of expected subfragments is unknown as fragment header was not received)";
313  }
314  }
315 
316  current_subfragments = 0;
317  fragment_size = std::numeric_limits<size_t>::max();
318  expected_subfragments = std::numeric_limits<size_t>::max();
319  current_sequenceID = seqID;
320  current_fragmentID = fragID;
321  }
322 
323  auto ptr_into_fragment = fragment.headerBeginBytes() + subfragID * subfragment_size_; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
324 
325  auto ptr_into_buffer = boost::asio::buffer_cast<const byte_t*>(buf) + sizeof(subfragment_identifier);
326 
327  std::copy(ptr_into_buffer, ptr_into_buffer + buf_size - sizeof(subfragment_identifier), ptr_into_fragment); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
328 
329  if (subfragID == 0)
330  {
331  if (buf_size >= sizeof(subfragment_identifier) + sizeof(artdaq::detail::RawFragmentHeader))
332  {
333  auto payload_size = std::numeric_limits<size_t>::max();
334  get_fragment_quantities(buf, payload_size, fragment_size, expected_subfragments);
335 
336  fragment.resizeBytes(payload_size);
337  }
338  else
339  {
340  throw cet::exception("MulticastTransfer") << "Buffer size is too small to completely contain an artdaq::Fragment header; " // NOLINT(cert-err60-cpp)
341  << "please increase the default size";
342  }
343  }
344 
345  current_subfragments++;
346 
347  if (current_subfragments == expected_subfragments)
348  {
349  fragment_complete = true;
350  }
351 
352  bytes_processed += buf_size;
353 
354  if (bytes_processed >= bytes_received)
355  {
356  break;
357  }
358  }
359 
360  if (last_fragment_truncated)
361  {
362  // JCF, 7-7-2017
363 
364  // Don't yet have code to handle the scenario where the set of
365  // subfragments received in the last iteration of the loop was
366  // its own complete fragment, but we know the previous fragment
367  // to be incomplete
368 
369  assert(!fragment_complete);
370  TLOG(TLVL_WARNING) << GetTraceName() << "Got an incomplete fragment";
372  }
373 
374  if (fragment_complete)
375  {
376  return source_rank();
377  }
378  }
379 
381 }
382 
383 #pragma GCC diagnostic pop
384 
385 int artdaq::MulticastTransfer::receiveFragmentHeader(detail::RawFragmentHeader& header, size_t receiveTimeout)
386 {
387  auto ret = receiveFragment(fragment_buffer_, receiveTimeout);
388  if (ret == source_rank())
389  {
390  header = *reinterpret_cast<detail::RawFragmentHeader*>(fragment_buffer_.headerAddress()); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
391  return source_rank();
392  }
393  return ret;
394 }
395 
396 int artdaq::MulticastTransfer::receiveFragmentData(RawDataType* destination, size_t wordCount)
397 {
398  if (fragment_buffer_.size() > detail::RawFragmentHeader::num_words())
399  {
400  auto dataSize = (fragment_buffer_.size() - detail::RawFragmentHeader::num_words()) * sizeof(RawDataType);
401  memcpy(destination, fragment_buffer_.headerAddress() + detail::RawFragmentHeader::num_words(), dataSize); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
402  return source_rank();
403  }
404  return RECV_TIMEOUT;
405 }
406 
407 // Reliable transport is undefined for multicast; just use copy
410 {
411  return transfer_fragment_min_blocking_mode(f, 100000000);
412 }
413 
416  size_t send_timeout_usec)
417 {
418  assert(TransferInterface::role() == Role::kSend);
419 
420  if (fragment.sizeBytes() > max_fragment_size_words_)
421  {
422  throw cet::exception("MulticastTransfer") << "Error in MulticastTransfer::copyFragmentTo: " << fragment.sizeBytes() << " byte fragment exceeds max_fragment_size of " << max_fragment_size_words_; // NOLINT(cert-err60-cpp)
423  }
424 
425  static size_t ncalls = 1;
426  auto num_subfragments = static_cast<size_t>(std::ceil(fragment.sizeBytes() / static_cast<float>(subfragment_size_)));
427 
428  ncalls++;
429 
430  fill_staging_memory(fragment);
431 
432  for (size_t batch_index = 0;; batch_index++)
433  {
434  auto first_subfragment = batch_index * subfragments_per_send_;
435  auto last_subfragment = (batch_index + 1) * subfragments_per_send_ >= num_subfragments ? num_subfragments - 1 : (batch_index + 1) * subfragments_per_send_ - 1;
436 
437  std::vector<boost::asio::const_buffer> buffers;
438 
439  book_container_of_buffers(buffers, fragment.sizeBytes(), num_subfragments, first_subfragment, last_subfragment);
440 
441  socket_->send_to(buffers, *multicast_endpoint_);
442 
443  usleep(pause_on_copy_usecs_);
444 
445  if (last_subfragment == num_subfragments - 1)
446  {
447  break;
448  }
449  }
450  return CopyStatus::kSuccess;
451 }
452 
453 #pragma GCC diagnostic push
454 #pragma GCC diagnostic ignored "-Wsign-compare"
455 
456 void artdaq::MulticastTransfer::fill_staging_memory(const artdaq::Fragment& fragment)
457 {
458  auto num_subfragments = static_cast<size_t>(std::ceil(fragment.sizeBytes() / static_cast<float>(subfragment_size_)));
459  TLOG(TLVL_DEBUG + 32) << GetTraceName() << "# of subfragments to use is " << num_subfragments;
460 
461  for (auto i_s = 0; i_s < num_subfragments; ++i_s)
462  {
463  auto staging_memory_copyto = &staging_memory_.at(i_s * (sizeof(subfragment_identifier) + subfragment_size_));
464 
465  subfragment_identifier sfi(fragment.sequenceID(), fragment.fragmentID(), i_s);
466 
467  std::copy(reinterpret_cast<byte_t*>(&sfi), // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
468  reinterpret_cast<byte_t*>(&sfi) + sizeof(subfragment_identifier), // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic,cppcoreguidelines-pro-type-reinterpret-cast)
469  staging_memory_copyto);
470 
471  auto low_ptr_into_fragment = fragment.headerBeginBytes() + subfragment_size_ * i_s; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
472 
473  auto high_ptr_into_fragment = (i_s == num_subfragments - 1) ? fragment.dataEndBytes() : fragment.headerBeginBytes() + subfragment_size_ * (i_s + 1); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
474 
475  std::copy(low_ptr_into_fragment,
476  high_ptr_into_fragment,
477  staging_memory_copyto + sizeof(subfragment_identifier)); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
478  }
479 }
480 
481 #pragma GCC diagnostic pop
482 
483 // Note that book_container_of_buffers includes, rather than excludes,
484 // "last_subfragment_num"; in this regard it's different than the way
485 // STL functions receive iterators. Note also that the lowest possible
486 // value for "first_subfragment_num" is 0, not 1.
487 
488 template<typename T>
489 void artdaq::MulticastTransfer::book_container_of_buffers(std::vector<T>& buffers,
490  const size_t fragment_size,
491  const size_t total_subfragments,
492  const size_t first_subfragment_num,
493  const size_t last_subfragment_num)
494 {
495  assert(staging_memory_.size() >= total_subfragments * (sizeof(subfragment_identifier) + subfragment_size_));
496  assert(buffers.empty());
497  assert(last_subfragment_num < total_subfragments);
498 
499  for (auto i_f = first_subfragment_num; i_f <= last_subfragment_num; ++i_f)
500  {
501  auto bytes_to_store = (i_f == total_subfragments - 1) ? sizeof(subfragment_identifier) + (fragment_size - (total_subfragments - 1) * subfragment_size_) : sizeof(subfragment_identifier) + subfragment_size_;
502 
503  buffers.emplace_back(&staging_memory_.at(i_f * (sizeof(subfragment_identifier) + subfragment_size_)),
504  bytes_to_store);
505  }
506 }
507 
508 #pragma GCC diagnostic push // Needed since profile builds will ignore the assert
509 #pragma GCC diagnostic ignored "-Wunused-variable"
510 
511 void artdaq::MulticastTransfer::get_fragment_quantities(const boost::asio::mutable_buffer& buf, size_t& payload_size,
512  size_t& fragment_size,
513  size_t& expected_subfragments)
514 {
515  auto* buffer_ptr = boost::asio::buffer_cast<byte_t*>(buf);
516 
517  auto subfragment_num = *(reinterpret_cast<size_t*>(buffer_ptr) + 2); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-pro-bounds-pointer-arithmetic)
518 
519  assert(subfragment_num == 0);
520 
521  auto* header =
522  reinterpret_cast<artdaq::detail::RawFragmentHeader*>(buffer_ptr + sizeof(subfragment_identifier)); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-pro-bounds-pointer-arithmetic)
523 
524  fragment_size = header->word_count * sizeof(artdaq::RawDataType);
525 
526  auto metadata_size = header->metadata_word_count * sizeof(artdaq::RawDataType);
527  payload_size = fragment_size - metadata_size - artdaq::detail::RawFragmentHeader::num_words() * sizeof(artdaq::RawDataType);
528 
529  assert(fragment_size ==
530  artdaq::detail::RawFragmentHeader::num_words() * sizeof(artdaq::RawDataType) +
531  metadata_size +
532  payload_size);
533 
534  expected_subfragments = static_cast<size_t>(std::ceil(fragment_size / static_cast<float>(subfragment_size_)));
535 }
536 #pragma GCC diagnostic pop
537 
538 void artdaq::MulticastTransfer::set_receive_buffer_size(size_t recv_buff_size)
539 {
540  if (recv_buff_size == 0)
541  {
542  return;
543  }
544  boost::asio::socket_base::receive_buffer_size actual_recv_buff_size;
545  socket_->get_option(actual_recv_buff_size);
546 
547  TLOG(TLVL_DEBUG + 32) << GetTraceName() << "Receive buffer size is currently " << actual_recv_buff_size.value() << " bytes, will try to change it to " << recv_buff_size;
548 
549  boost::asio::socket_base::receive_buffer_size recv_buff_option(recv_buff_size);
550 
551  boost::system::error_code ec;
552  socket_->set_option(recv_buff_option, ec);
553 
554  if (ec.value() != 0)
555  {
556  TLOG(TLVL_ERROR) << "boost::system::error_code with value " << ec << " was found in attempt to change receive buffer";
557  std::cerr << "boost::system::error_code with value " << ec << " was found in attempt to change receive buffer" << std::endl;
558  }
559 
560  socket_->get_option(actual_recv_buff_size);
561  TLOG(TLVL_DEBUG + 32) << GetTraceName() << "After attempted change, receive buffer size is now " << actual_recv_buff_size.value();
562 }
563 
564 #pragma GCC diagnostic pop
565 
566 DEFINE_ARTDAQ_TRANSFER(artdaq::MulticastTransfer)
virtual int source_rank() const
Get the source rank for this TransferInterface instance.
int receiveFragment(artdaq::Fragment &fragment, size_t receiveTimeout) override
Receive a Fragment using Multicast.
void flush_buffers() override
Flush any in-flight data. This should be used by the receiver after the receive loop has ended...
Role role() const
Get the TransferInterface::Role of this TransferInterface.
int receiveFragmentHeader(detail::RawFragmentHeader &header, size_t receiveTimeout) override
Receive a Fragment Header from the transport mechanism.
bool isRunning() override
Determine whether the TransferInterface plugin is able to send/receive data.
~MulticastTransfer() override=default
Default destructor.
CopyStatus transfer_fragment_min_blocking_mode(artdaq::Fragment const &fragment, size_t send_timeout_usec) override
Copy a Fragment to the destination. Multicast is always unreliable.
MulticastTransfer is a TransferInterface implementation plugin that transfers data using Multicast...
This TransferInterface is a Receiver.
int receiveFragmentData(RawDataType *destination, size_t wordCount) override
Receive the body of a Fragment to the given destination pointer.
This TransferInterface is a Sender.
CopyStatus transfer_fragment_reliable_mode(artdaq::Fragment &&fragment) override
Move a Fragment to the destination. Multicast is always unreliable.
Role
Used to determine if a TransferInterface is a Sender or Receiver.
MulticastTransfer(fhicl::ParameterSet const &ps, Role role)
MulticastTransfer Constructor.
This interface defines the functions used to transfer data between artdaq applications.
artdaq::Fragment::byte_t byte_t
Copy Fragment::byte_t into local scope.
Value to be returned upon receive timeout.
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.
const size_t max_fragment_size_words_
The maximum size of the transferred Fragment objects, in artdaq::Fragment::RawDataType words...