artdaq  v3_00_01
Multicast_transfer.cc
1 #include "artdaq/TransferPlugins/TransferInterface.hh"
2 
3 #include "artdaq-core/Data/Fragment.hh"
4 #include "artdaq-core/Utilities/ExceptionHandler.hh"
5 
6 #include "fhiclcpp/ParameterSet.h"
7 #include "cetlib_except/exception.h"
8 
9 #include <boost/asio.hpp>
10 #include <boost/bind.hpp>
11 
12 #include <iostream>
13 #include <vector>
14 #include <cassert>
15 #include <string>
16 #include <type_traits>
17 #include <bitset>
18 
19 #pragma GCC diagnostic push
20 #pragma GCC diagnostic ignored "-Wunused-parameter"
21 
22 
23 namespace artdaq
24 {
29  {
30  public:
31 
32  using byte_t = artdaq::Fragment::byte_t;
33 
37  virtual ~MulticastTransfer() = default;
38 
56  MulticastTransfer(fhicl::ParameterSet const& ps, Role role);
57 
64  int receiveFragment(artdaq::Fragment& fragment,
65  size_t receiveTimeout) override;
66 
73  int receiveFragmentHeader(detail::RawFragmentHeader& header, size_t receiveTimeout) override;
74 
81  int receiveFragmentData(RawDataType* destination, size_t wordCount) override;
82 
89  CopyStatus copyFragment(artdaq::Fragment& fragment,
90  size_t send_timeout_usec = std::numeric_limits<size_t>::max()) override;
91 
98  CopyStatus moveFragment(artdaq::Fragment&& fragment,
99  size_t send_timeout_usec = std::numeric_limits<size_t>::max()) override;
100 
101  private:
102 
103  void fill_staging_memory(const artdaq::Fragment& frag);
104 
105  template <typename T>
106  void book_container_of_buffers(std::vector<T>& buffers,
107  const size_t fragment_size,
108  const size_t total_subfragments,
109  const size_t first_subfragment_num,
110  const size_t last_subfragment_num);
111 
112  void get_fragment_quantities(const boost::asio::mutable_buffer& buf, size_t& payload_size, size_t& fragment_size,
113  size_t& expected_subfragments);
114 
115  void set_receive_buffer_size(size_t recv_buff_size);
116 
117  class subfragment_identifier
118  {
119  public:
120 
121  subfragment_identifier(size_t sequenceID, size_t fragmentID, size_t subfragment_number) :
122  sequenceID_(sequenceID)
123  , fragmentID_(fragmentID)
124  , subfragment_number_(subfragment_number) { }
125 
126  size_t sequenceID() const { return sequenceID_; }
127  size_t fragmentID() const { return fragmentID_; }
128  size_t subfragment_number() const { return subfragment_number_; }
129 
130  private:
131  size_t sequenceID_;
132  size_t fragmentID_;
133  size_t subfragment_number_;
134  };
135 
136  std::unique_ptr<boost::asio::io_service> io_service_;
137 
138  std::unique_ptr<boost::asio::ip::udp::endpoint> local_endpoint_;
139  std::unique_ptr<boost::asio::ip::udp::endpoint> multicast_endpoint_;
140  std::unique_ptr<boost::asio::ip::udp::endpoint> opposite_endpoint_;
141 
142  std::unique_ptr<boost::asio::ip::udp::socket> socket_;
143 
144  size_t subfragment_size_;
145  size_t subfragments_per_send_;
146 
147  size_t pause_on_copy_usecs_;
148  Fragment fragment_buffer_;
149 
150  std::vector<byte_t> staging_memory_;
151 
152  std::vector<boost::asio::mutable_buffer> receive_buffers_;
153  };
154 }
155 
156 artdaq::MulticastTransfer::MulticastTransfer(fhicl::ParameterSet const& pset, Role role) :
157  TransferInterface(pset, role)
158  , io_service_(std::make_unique<std::remove_reference<decltype(*io_service_)>::type>())
159  , local_endpoint_(nullptr)
160  , multicast_endpoint_(nullptr)
161  , opposite_endpoint_(std::make_unique<std::remove_reference<decltype(*opposite_endpoint_)>::type>())
162  , socket_(nullptr)
163  , subfragment_size_(pset.get<size_t>("subfragment_size"))
164  , subfragments_per_send_(pset.get<size_t>("subfragments_per_send"))
165  , pause_on_copy_usecs_(pset.get<size_t>("pause_on_copy_usecs", 0))
166 {
167  try
168  {
169  auto port = pset.get<unsigned short>("multicast_port");
170  auto multicast_address = boost::asio::ip::address::from_string(pset.get<std::string>("multicast_address"));
171  auto local_address = boost::asio::ip::address::from_string(pset.get<std::string>("local_address"));
172 
173  TLOG_DEBUG(uniqueLabel()) << "multicast address is set to " << multicast_address << TLOG_ENDL;
174  TLOG_DEBUG(uniqueLabel()) << "local address is set to " << local_address << TLOG_ENDL;
175 
177  {
178  local_endpoint_ = std::make_unique<std::remove_reference<decltype(*local_endpoint_)>::type>(local_address, 0);
179  multicast_endpoint_ = std::make_unique<std::remove_reference<decltype(*multicast_endpoint_)>::type>(multicast_address, port);
180 
181  socket_ = std::make_unique<std::remove_reference<decltype(*socket_)>::type>(*io_service_,
182  multicast_endpoint_->protocol());
183  socket_->bind(*local_endpoint_);
184  }
185  else
186  { // TransferInterface::role() == Role::kReceive
187 
188  // Create the socket so that multiple may be bound to the same address.
189 
190  local_endpoint_ = std::make_unique<std::remove_reference<decltype(*local_endpoint_)>::type>(local_address, port);
191  socket_ = std::make_unique<std::remove_reference<decltype(*socket_)>::type>(*io_service_,
192  local_endpoint_->protocol());
193 
194  boost::system::error_code ec;
195 
196  socket_->set_option(boost::asio::ip::udp::socket::reuse_address(true), ec);
197 
198  if (ec != 0)
199  {
200  std::cerr << "boost::system::error_code with value " << ec << " was found in setting reuse_address option" << std::endl;
201  }
202 
203  set_receive_buffer_size(pset.get<size_t>("receive_buffer_size", 0));
204 
205  socket_->bind(boost::asio::ip::udp::endpoint(multicast_address, port));
206 
207  // Join the multicast group.
208 
209  socket_->set_option(boost::asio::ip::multicast::join_group(multicast_address), ec);
210 
211  if (ec != 0)
212  {
213  std::cerr << "boost::system::error_code with value " << ec << " was found in attempt to join multicast group" << std::endl;
214  }
215  }
216  }
217  catch (...)
218  {
219  ExceptionHandler(ExceptionHandlerRethrow::yes, "Problem setting up the socket in MulticastTransfer");
220  }
221 
222  auto max_subfragments =
223  static_cast<size_t>(std::ceil(max_fragment_size_words_ / static_cast<float>(subfragment_size_)));
224 
225  staging_memory_.resize(max_subfragments * (sizeof(subfragment_identifier) + subfragment_size_));
226 
228  {
229  book_container_of_buffers(receive_buffers_, max_fragment_size_words_, max_subfragments, 0, max_subfragments - 1);
230  }
231 
232  TLOG_DEBUG(uniqueLabel()) << "max_subfragments is " << max_subfragments << TLOG_ENDL;
233  TLOG_DEBUG(uniqueLabel()) << "Staging buffer size is " << staging_memory_.size() << TLOG_ENDL;
234 }
235 
236 #pragma GCC diagnostic push
237 #pragma GCC diagnostic ignored "-Wunused-variable"
238 
239 int artdaq::MulticastTransfer::receiveFragment(artdaq::Fragment& fragment,
240  size_t receiveTimeout)
241 {
242  assert(TransferInterface::role() == Role::kReceive);
243 
244  if (fragment.dataSizeBytes() > 0)
245  {
246  throw cet::exception("MulticastTransfer") << "Error in MulticastTransfer::receiveFragmentFrom: " <<
247  "nonzero payload found in fragment passed as argument";
248  }
249 
250  static bool print_warning = true;
251 
252  if (print_warning)
253  {
254  std::cerr << "Please note that MulticastTransfer::receiveFragmentFrom does not use its receiveTimeout argument" << std::endl;
255  print_warning = false;
256  }
257 
258  fragment.resizeBytes(max_fragment_size_words_ - sizeof(artdaq::detail::RawFragmentHeader));
259 
260  static auto current_sequenceID = std::numeric_limits<Fragment::sequence_id_t>::max();
261  static auto current_fragmentID = std::numeric_limits<Fragment::fragment_id_t>::max();
262 
263  size_t fragment_size = 0;
264  size_t expected_subfragments = 0;
265  size_t current_subfragments = 0;
266  bool fragment_complete = false;
267  bool last_fragment_truncated = false;
268 
269  while (true)
270  {
271  auto bytes_received = socket_->receive_from(receive_buffers_, *opposite_endpoint_);
272 
273  size_t bytes_processed = 0;
274 
275  for (auto& buf : receive_buffers_)
276  {
277  auto buf_size = boost::asio::buffer_size(buf);
278  auto size_t_ptr = boost::asio::buffer_cast<const size_t*>(buf);
279  auto seqID = *size_t_ptr;
280  auto fragID = *(size_t_ptr + 1);
281  auto subfragID = *(size_t_ptr + 2);
282 
283  if (seqID != current_sequenceID || fragID != current_fragmentID)
284  {
285  // JCF, Jun-22-2016
286  // Code currently operates under the assumption that all subfragments from the call are from the same fragment
287 
288  assert(bytes_processed == 0);
289 
290  if (current_subfragments < expected_subfragments)
291  {
292  last_fragment_truncated = true;
293 
294  if (expected_subfragments != std::numeric_limits<size_t>::max())
295  {
296  std::cerr << "Warning: only received " << current_subfragments << " subfragments for fragment with seqID = " <<
297  current_sequenceID << ", fragID = " << current_fragmentID << " (expected " << expected_subfragments << ")\n"
298  << std::endl;
299  }
300  else
301  {
302  std::cerr << "Warning: only received " << current_subfragments <<
303  " subfragments for fragment with seqID = " <<
304  current_sequenceID << ", fragID = " << current_fragmentID <<
305  ", # of expected subfragments is unknown as fragment header was not received)\n"
306  << std::endl;
307  }
308  }
309 
310  current_subfragments = 0;
311  fragment_size = std::numeric_limits<size_t>::max();
312  expected_subfragments = std::numeric_limits<size_t>::max();
313  current_sequenceID = seqID;
314  current_fragmentID = fragID;
315  }
316 
317  auto ptr_into_fragment = fragment.headerBeginBytes() + subfragID * subfragment_size_;
318 
319  auto ptr_into_buffer = boost::asio::buffer_cast<const byte_t*>(buf) + sizeof(subfragment_identifier);
320 
321  std::copy(ptr_into_buffer, ptr_into_buffer + buf_size - sizeof(subfragment_identifier), ptr_into_fragment);
322 
323  if (subfragID == 0)
324  {
325  if (buf_size >= sizeof(subfragment_identifier) + sizeof(artdaq::detail::RawFragmentHeader))
326  {
327  auto payload_size = std::numeric_limits<size_t>::max();
328  get_fragment_quantities(buf, payload_size, fragment_size, expected_subfragments);
329 
330  fragment.resizeBytes(payload_size);
331  }
332  else
333  {
334  throw cet::exception("MulticastTransfer") << "Buffer size is too small to completely contain an artdaq::Fragment header; " <<
335  "please increase the default size";
336  }
337  }
338 
339  current_subfragments++;
340 
341  if (current_subfragments == expected_subfragments)
342  {
343  fragment_complete = true;
344  }
345 
346  bytes_processed += buf_size;
347 
348  if (bytes_processed >= bytes_received)
349  {
350  break;
351  }
352  }
353 
354  if (last_fragment_truncated)
355  {
356  // JCF, 7-7-2017
357 
358  // Don't yet have code to handle the scenario where the set of
359  // subfragments received in the last iteration of the loop was
360  // its own complete fragment, but we know the previous fragment
361  // to be incomplete
362 
363  assert(!fragment_complete);
364  TLOG_WARNING(uniqueLabel()) << "Got an incomplete fragment" << TLOG_ENDL;
366  }
367 
368  if (fragment_complete)
369  {
370  return source_rank();
371  }
372  }
373 
375 }
376 
377 #pragma GCC diagnostic pop
378 
379 int artdaq::MulticastTransfer::receiveFragmentHeader(detail::RawFragmentHeader& header, size_t receiveTimeout)
380 {
381  auto ret = receiveFragment(fragment_buffer_, receiveTimeout);
382  if (ret == source_rank())
383  {
384  header = *reinterpret_cast<detail::RawFragmentHeader*>(fragment_buffer_.headerAddress());
385  return source_rank();
386  }
387  return ret;
388 }
389 
390 int artdaq::MulticastTransfer::receiveFragmentData(RawDataType* destination, size_t wordCount)
391 {
392  if (fragment_buffer_.size() > detail::RawFragmentHeader::num_words()) {
393  auto dataSize = (fragment_buffer_.size() - detail::RawFragmentHeader::num_words()) * sizeof(RawDataType);
394  memcpy(destination, fragment_buffer_.headerAddress() + detail::RawFragmentHeader::num_words(), dataSize);
395  return source_rank();
396  }
397  return RECV_TIMEOUT;
398 }
399 
400 
401 // Reliable transport is undefined for multicast; just use copy
403 artdaq::MulticastTransfer::moveFragment(artdaq::Fragment&& f, size_t tmo)
404 {
405  return copyFragment(f, tmo);
406 }
407 
409 artdaq::MulticastTransfer::copyFragment(artdaq::Fragment& fragment,
410  size_t send_timeout_usec)
411 {
412  assert(TransferInterface::role() == Role::kSend);
413 
414  if (fragment.sizeBytes() > max_fragment_size_words_)
415  {
416  throw cet::exception("MulticastTransfer") << "Error in MulticastTransfer::copyFragmentTo: " <<
417  fragment.sizeBytes() << " byte fragment exceeds max_fragment_size of " << max_fragment_size_words_;
418  }
419 
420  static size_t ncalls = 1;
421  auto num_subfragments = static_cast<size_t>(std::ceil(fragment.sizeBytes() / static_cast<float>(subfragment_size_)));
422 
423  ncalls++;
424 
425  fill_staging_memory(fragment);
426 
427  for (size_t batch_index = 0; ; batch_index++)
428  {
429  auto first_subfragment = batch_index * subfragments_per_send_;
430  auto last_subfragment = (batch_index + 1) * subfragments_per_send_ >= num_subfragments ?
431  num_subfragments - 1 :
432  (batch_index + 1) * subfragments_per_send_ - 1;
433 
434  std::vector<boost::asio::const_buffer> buffers;
435 
436  book_container_of_buffers(buffers, fragment.sizeBytes(), num_subfragments, first_subfragment, last_subfragment);
437 
438  socket_->send_to(buffers, *multicast_endpoint_);
439 
440  usleep(pause_on_copy_usecs_);
441 
442  if (last_subfragment == num_subfragments - 1)
443  {
444  break;
445  }
446  }
447  return CopyStatus::kSuccess;
448 }
449 
450 #pragma GCC diagnostic push
451 #pragma GCC diagnostic ignored "-Wsign-compare"
452 
453 void artdaq::MulticastTransfer::fill_staging_memory(const artdaq::Fragment& fragment)
454 {
455  auto num_subfragments = static_cast<size_t>(std::ceil(fragment.sizeBytes() / static_cast<float>(subfragment_size_)));
456  TLOG_DEBUG(uniqueLabel()) << "# of subfragments to use is " << num_subfragments << TLOG_ENDL;
457 
458  for (auto i_s = 0; i_s < num_subfragments; ++i_s)
459  {
460  auto staging_memory_copyto = &staging_memory_.at(i_s * (sizeof(subfragment_identifier) + subfragment_size_));
461 
462  subfragment_identifier sfi(fragment.sequenceID(), fragment.fragmentID(), i_s);
463 
464  std::copy(reinterpret_cast<byte_t*>(&sfi),
465  reinterpret_cast<byte_t*>(&sfi) + sizeof(subfragment_identifier),
466  staging_memory_copyto);
467 
468  auto low_ptr_into_fragment = fragment.headerBeginBytes() + subfragment_size_ * i_s;
469 
470  auto high_ptr_into_fragment = (i_s == num_subfragments - 1) ?
471  fragment.dataEndBytes() :
472  fragment.headerBeginBytes() + subfragment_size_ * (i_s + 1);
473 
474  std::copy(low_ptr_into_fragment,
475  high_ptr_into_fragment,
476  staging_memory_copyto + sizeof(subfragment_identifier));
477  }
478 }
479 
480 #pragma GCC diagnostic pop
481 
482 // Note that book_container_of_buffers includes, rather than excludes,
483 // "last_subfragment_num"; in this regard it's different than the way
484 // STL functions receive iterators. Note also that the lowest possible
485 // value for "first_subfragment_num" is 0, not 1.
486 
487 template <typename T>
488 void artdaq::MulticastTransfer::book_container_of_buffers(std::vector<T>& buffers,
489  const size_t fragment_size,
490  const size_t total_subfragments,
491  const size_t first_subfragment_num,
492  const size_t last_subfragment_num)
493 {
494  assert(staging_memory_.size() >= total_subfragments * (sizeof(subfragment_identifier) + subfragment_size_));
495  assert(buffers.size() == 0);
496  assert(last_subfragment_num < total_subfragments);
497 
498  for (auto i_f = first_subfragment_num; i_f <= last_subfragment_num; ++i_f)
499  {
500  auto bytes_to_store = (i_f == total_subfragments - 1) ?
501  sizeof(subfragment_identifier) + (fragment_size - (total_subfragments - 1) * subfragment_size_) :
502  sizeof(subfragment_identifier) + subfragment_size_;
503 
504  buffers.emplace_back(&staging_memory_.at(i_f * (sizeof(subfragment_identifier) + subfragment_size_)),
505  bytes_to_store);
506  }
507 }
508 
509 
510 #pragma GCC diagnostic push // Needed since profile builds will ignore the assert
511 #pragma GCC diagnostic ignored "-Wunused-variable"
512 
513 void artdaq::MulticastTransfer::get_fragment_quantities(const boost::asio::mutable_buffer& buf, size_t& payload_size,
514  size_t& fragment_size,
515  size_t& expected_subfragments)
516 {
517  byte_t* buffer_ptr = boost::asio::buffer_cast<byte_t*>(buf);
518 
519  auto subfragment_num = *(reinterpret_cast<size_t*>(buffer_ptr) + 2);
520 
521  assert(subfragment_num == 0);
522 
523  artdaq::detail::RawFragmentHeader* header =
524  reinterpret_cast<artdaq::detail::RawFragmentHeader*>(buffer_ptr + sizeof(subfragment_identifier));
525 
526  fragment_size = header->word_count * sizeof(artdaq::RawDataType);
527 
528  auto metadata_size = header->metadata_word_count * sizeof(artdaq::RawDataType);
529  payload_size = fragment_size - metadata_size - artdaq::detail::RawFragmentHeader::num_words() *
530  sizeof(artdaq::RawDataType);
531 
532  assert(fragment_size ==
533  artdaq::detail::RawFragmentHeader::num_words() * sizeof(artdaq::RawDataType) +
534  metadata_size +
535  payload_size);
536 
537  expected_subfragments = static_cast<size_t>(std::ceil(fragment_size / static_cast<float>(subfragment_size_)));
538 }
539 #pragma GCC diagnostic pop
540 
541 void artdaq::MulticastTransfer::set_receive_buffer_size(size_t recv_buff_size)
542 {
543  if (recv_buff_size == 0) return;
544  boost::asio::socket_base::receive_buffer_size actual_recv_buff_size;
545  socket_->get_option(actual_recv_buff_size);
546 
547  TLOG_DEBUG(uniqueLabel()) << "Receive buffer size is currently " << actual_recv_buff_size.value() <<
548  " bytes, will try to change it to " << recv_buff_size << TLOG_ENDL;
549 
550  boost::asio::socket_base::receive_buffer_size recv_buff_option(recv_buff_size);
551 
552  boost::system::error_code ec;
553  socket_->set_option(recv_buff_option, ec);
554 
555  if (ec != 0)
556  {
557  std::cerr << "boost::system::error_code with value " << ec <<
558  " was found in attempt to change receive buffer" << std::endl;
559  }
560 
561  socket_->get_option(actual_recv_buff_size);
562  TLOG_DEBUG(uniqueLabel()) << "After attempted change, receive buffer size is now " << actual_recv_buff_size.value() << TLOG_ENDL;
563 }
564 
565 #pragma GCC diagnostic pop
566 
567 DEFINE_ARTDAQ_TRANSFER(artdaq::MulticastTransfer)
int receiveFragment(artdaq::Fragment &fragment, size_t receiveTimeout) override
Receive a Fragment using Multicast.
Role role() const
Get the TransferInterface::Role of this TransferInterface.
int receiveFragmentHeader(detail::RawFragmentHeader &header, size_t receiveTimeout) override
Receive a Fragment Header from the transport mechanism.
MulticastTransfer is a TransferInterface implementation plugin that transfers data using Multicast...
static const int RECV_TIMEOUT
Value to be returned upon receive timeout. Because receivers otherwise return rank, this is also the limit on the number of ranks that artdaq currently supports.
This TransferInterface is a Receiver.
CopyStatus copyFragment(artdaq::Fragment &fragment, size_t send_timeout_usec=std::numeric_limits< size_t >::max()) override
Copy a Fragment to the destination. Multicast is always unreliable.
int receiveFragmentData(RawDataType *destination, size_t wordCount) override
Receive the body of a Fragment to the given destination pointer.
This TransferInterface is a Sender.
virtual ~MulticastTransfer()=default
Default destructor.
Role
Used to determine if a TransferInterface is a Sender or Receiver.
MulticastTransfer(fhicl::ParameterSet const &ps, Role role)
MulticastTransfer Constructor.
std::string uniqueLabel() const
Get the unique label of this TransferInterface instance.
This interface defines the functions used to transfer data between artdaq applications.
artdaq::Fragment::byte_t byte_t
Copy Fragment::byte_t into local scope.
CopyStatus moveFragment(artdaq::Fragment &&fragment, size_t send_timeout_usec=std::numeric_limits< size_t >::max()) override
Move a Fragment to the destination. Multicast is always unreliable.
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.
const size_t max_fragment_size_words_
The maximum size of the transferred Fragment objects, in artdaq::Fragment::RawDataType words...