artdaq  v3_02_01
Multicast_transfer.cc
1 #define TRACE_NAME "MulticastTransfer"
2 
3 #include "artdaq/TransferPlugins/TransferInterface.hh"
4 
5 #include "artdaq-core/Data/Fragment.hh"
6 #include "artdaq-core/Utilities/ExceptionHandler.hh"
7 
8 #include "fhiclcpp/ParameterSet.h"
9 #include "cetlib_except/exception.h"
10 
11 #include <boost/asio.hpp>
12 #include <boost/bind.hpp>
13 
14 #include <iostream>
15 #include <vector>
16 #include <cassert>
17 #include <string>
18 #include <type_traits>
19 #include <bitset>
20 
21 #pragma GCC diagnostic push
22 #pragma GCC diagnostic ignored "-Wunused-parameter"
23 
24 
25 namespace artdaq
26 {
31  {
32  public:
33 
34  using byte_t = artdaq::Fragment::byte_t;
35 
39  virtual ~MulticastTransfer() = default;
40 
58  MulticastTransfer(fhicl::ParameterSet const& ps, Role role);
59 
66  int receiveFragment(artdaq::Fragment& fragment,
67  size_t receiveTimeout) override;
68 
75  int receiveFragmentHeader(detail::RawFragmentHeader& header, size_t receiveTimeout) override;
76 
83  int receiveFragmentData(RawDataType* destination, size_t wordCount) override;
84 
91  CopyStatus copyFragment(artdaq::Fragment& fragment, size_t send_timeout_usec) override;
92 
98  CopyStatus moveFragment(artdaq::Fragment&& fragment) override;
99 
104  bool isRunning() override { return socket_ != nullptr; }
105  private:
106 
107  void fill_staging_memory(const artdaq::Fragment& frag);
108 
109  template <typename T>
110  void book_container_of_buffers(std::vector<T>& buffers,
111  const size_t fragment_size,
112  const size_t total_subfragments,
113  const size_t first_subfragment_num,
114  const size_t last_subfragment_num);
115 
116  void get_fragment_quantities(const boost::asio::mutable_buffer& buf, size_t& payload_size, size_t& fragment_size,
117  size_t& expected_subfragments);
118 
119  void set_receive_buffer_size(size_t recv_buff_size);
120 
121  class subfragment_identifier
122  {
123  public:
124 
125  subfragment_identifier(size_t sequenceID, size_t fragmentID, size_t subfragment_number) :
126  sequenceID_(sequenceID)
127  , fragmentID_(fragmentID)
128  , subfragment_number_(subfragment_number) { }
129 
130  size_t sequenceID() const { return sequenceID_; }
131  size_t fragmentID() const { return fragmentID_; }
132  size_t subfragment_number() const { return subfragment_number_; }
133 
134  private:
135  size_t sequenceID_;
136  size_t fragmentID_;
137  size_t subfragment_number_;
138  };
139 
140  std::unique_ptr<boost::asio::io_service> io_service_;
141 
142  std::unique_ptr<boost::asio::ip::udp::endpoint> local_endpoint_;
143  std::unique_ptr<boost::asio::ip::udp::endpoint> multicast_endpoint_;
144  std::unique_ptr<boost::asio::ip::udp::endpoint> opposite_endpoint_;
145 
146  std::unique_ptr<boost::asio::ip::udp::socket> socket_;
147 
148  size_t subfragment_size_;
149  size_t subfragments_per_send_;
150 
151  size_t pause_on_copy_usecs_;
152  Fragment fragment_buffer_;
153 
154  std::vector<byte_t> staging_memory_;
155 
156  std::vector<boost::asio::mutable_buffer> receive_buffers_;
157  };
158 }
159 
160 artdaq::MulticastTransfer::MulticastTransfer(fhicl::ParameterSet const& pset, Role role) :
161  TransferInterface(pset, role)
162  , io_service_(std::make_unique<std::remove_reference<decltype(*io_service_)>::type>())
163  , local_endpoint_(nullptr)
164  , multicast_endpoint_(nullptr)
165  , opposite_endpoint_(std::make_unique<std::remove_reference<decltype(*opposite_endpoint_)>::type>())
166  , socket_(nullptr)
167  , subfragment_size_(pset.get<size_t>("subfragment_size"))
168  , subfragments_per_send_(pset.get<size_t>("subfragments_per_send"))
169  , pause_on_copy_usecs_(pset.get<size_t>("pause_on_copy_usecs", 0))
170 {
171  try
172  {
173  auto port = pset.get<unsigned short>("multicast_port");
174  auto multicast_address = boost::asio::ip::address::from_string(pset.get<std::string>("multicast_address"));
175  auto local_address = boost::asio::ip::address::from_string(pset.get<std::string>("local_address"));
176 
177  TLOG(TLVL_DEBUG) << GetTraceName() << ": multicast address is set to " << multicast_address ;
178  TLOG(TLVL_DEBUG) << GetTraceName() << ": local address is set to " << local_address ;
179 
181  {
182  local_endpoint_ = std::make_unique<std::remove_reference<decltype(*local_endpoint_)>::type>(local_address, 0);
183  multicast_endpoint_ = std::make_unique<std::remove_reference<decltype(*multicast_endpoint_)>::type>(multicast_address, port);
184 
185  socket_ = std::make_unique<std::remove_reference<decltype(*socket_)>::type>(*io_service_,
186  multicast_endpoint_->protocol());
187  socket_->bind(*local_endpoint_);
188  }
189  else
190  { // TransferInterface::role() == Role::kReceive
191 
192  // Create the socket so that multiple may be bound to the same address.
193 
194  local_endpoint_ = std::make_unique<std::remove_reference<decltype(*local_endpoint_)>::type>(local_address, port);
195  socket_ = std::make_unique<std::remove_reference<decltype(*socket_)>::type>(*io_service_,
196  local_endpoint_->protocol());
197 
198  boost::system::error_code ec;
199 
200  socket_->set_option(boost::asio::ip::udp::socket::reuse_address(true), ec);
201 
202  if (ec != 0)
203  {
204  std::cerr << "boost::system::error_code with value " << ec << " was found in setting reuse_address option" << std::endl;
205  }
206 
207  set_receive_buffer_size(pset.get<size_t>("receive_buffer_size", 0));
208 
209  socket_->bind(boost::asio::ip::udp::endpoint(multicast_address, port));
210 
211  // Join the multicast group.
212 
213  socket_->set_option(boost::asio::ip::multicast::join_group(multicast_address), ec);
214 
215  if (ec != 0)
216  {
217  std::cerr << "boost::system::error_code with value " << ec << " was found in attempt to join multicast group" << std::endl;
218  }
219  }
220  }
221  catch (...)
222  {
223  ExceptionHandler(ExceptionHandlerRethrow::yes, "Problem setting up the socket in MulticastTransfer");
224  }
225 
226  auto max_subfragments =
227  static_cast<size_t>(std::ceil(max_fragment_size_words_ / static_cast<float>(subfragment_size_)));
228 
229  staging_memory_.resize(max_subfragments * (sizeof(subfragment_identifier) + subfragment_size_));
230 
232  {
233  book_container_of_buffers(receive_buffers_, max_fragment_size_words_, max_subfragments, 0, max_subfragments - 1);
234  }
235 
236  TLOG(TLVL_DEBUG) << GetTraceName() << ": max_subfragments is " << max_subfragments ;
237  TLOG(TLVL_DEBUG) << GetTraceName() << ": Staging buffer size is " << staging_memory_.size() ;
238 }
239 
240 #pragma GCC diagnostic push
241 #pragma GCC diagnostic ignored "-Wunused-variable"
242 
243 int artdaq::MulticastTransfer::receiveFragment(artdaq::Fragment& fragment,
244  size_t receiveTimeout)
245 {
246  assert(TransferInterface::role() == Role::kReceive);
247 
248  if (fragment.dataSizeBytes() > 0)
249  {
250  throw cet::exception("MulticastTransfer") << "Error in MulticastTransfer::receiveFragmentFrom: " <<
251  "nonzero payload found in fragment passed as argument";
252  }
253 
254  static bool print_warning = true;
255 
256  if (print_warning)
257  {
258  std::cerr << "Please note that MulticastTransfer::receiveFragmentFrom does not use its receiveTimeout argument" << std::endl;
259  print_warning = false;
260  }
261 
262  fragment.resizeBytes(max_fragment_size_words_ - sizeof(artdaq::detail::RawFragmentHeader));
263 
264  static auto current_sequenceID = std::numeric_limits<Fragment::sequence_id_t>::max();
265  static auto current_fragmentID = std::numeric_limits<Fragment::fragment_id_t>::max();
266 
267  size_t fragment_size = 0;
268  size_t expected_subfragments = 0;
269  size_t current_subfragments = 0;
270  bool fragment_complete = false;
271  bool last_fragment_truncated = false;
272 
273  while (true)
274  {
275  auto bytes_received = socket_->receive_from(receive_buffers_, *opposite_endpoint_);
276 
277  size_t bytes_processed = 0;
278 
279  for (auto& buf : receive_buffers_)
280  {
281  auto buf_size = boost::asio::buffer_size(buf);
282  auto size_t_ptr = boost::asio::buffer_cast<const size_t*>(buf);
283  auto seqID = *size_t_ptr;
284  auto fragID = *(size_t_ptr + 1);
285  auto subfragID = *(size_t_ptr + 2);
286 
287  if (seqID != current_sequenceID || fragID != current_fragmentID)
288  {
289  // JCF, Jun-22-2016
290  // Code currently operates under the assumption that all subfragments from the call are from the same fragment
291 
292  assert(bytes_processed == 0);
293 
294  if (current_subfragments < expected_subfragments)
295  {
296  last_fragment_truncated = true;
297 
298  if (expected_subfragments != std::numeric_limits<size_t>::max())
299  {
300  std::cerr << "Warning: only received " << current_subfragments << " subfragments for fragment with seqID = " <<
301  current_sequenceID << ", fragID = " << current_fragmentID << " (expected " << expected_subfragments << ")\n"
302  << std::endl;
303  }
304  else
305  {
306  std::cerr << "Warning: only received " << current_subfragments <<
307  " subfragments for fragment with seqID = " <<
308  current_sequenceID << ", fragID = " << current_fragmentID <<
309  ", # of expected subfragments is unknown as fragment header was not received)\n"
310  << std::endl;
311  }
312  }
313 
314  current_subfragments = 0;
315  fragment_size = std::numeric_limits<size_t>::max();
316  expected_subfragments = std::numeric_limits<size_t>::max();
317  current_sequenceID = seqID;
318  current_fragmentID = fragID;
319  }
320 
321  auto ptr_into_fragment = fragment.headerBeginBytes() + subfragID * subfragment_size_;
322 
323  auto ptr_into_buffer = boost::asio::buffer_cast<const byte_t*>(buf) + sizeof(subfragment_identifier);
324 
325  std::copy(ptr_into_buffer, ptr_into_buffer + buf_size - sizeof(subfragment_identifier), ptr_into_fragment);
326 
327  if (subfragID == 0)
328  {
329  if (buf_size >= sizeof(subfragment_identifier) + sizeof(artdaq::detail::RawFragmentHeader))
330  {
331  auto payload_size = std::numeric_limits<size_t>::max();
332  get_fragment_quantities(buf, payload_size, fragment_size, expected_subfragments);
333 
334  fragment.resizeBytes(payload_size);
335  }
336  else
337  {
338  throw cet::exception("MulticastTransfer") << "Buffer size is too small to completely contain an artdaq::Fragment header; " <<
339  "please increase the default size";
340  }
341  }
342 
343  current_subfragments++;
344 
345  if (current_subfragments == expected_subfragments)
346  {
347  fragment_complete = true;
348  }
349 
350  bytes_processed += buf_size;
351 
352  if (bytes_processed >= bytes_received)
353  {
354  break;
355  }
356  }
357 
358  if (last_fragment_truncated)
359  {
360  // JCF, 7-7-2017
361 
362  // Don't yet have code to handle the scenario where the set of
363  // subfragments received in the last iteration of the loop was
364  // its own complete fragment, but we know the previous fragment
365  // to be incomplete
366 
367  assert(!fragment_complete);
368  TLOG(TLVL_WARNING) << GetTraceName() << ": Got an incomplete fragment" ;
370  }
371 
372  if (fragment_complete)
373  {
374  return source_rank();
375  }
376  }
377 
379 }
380 
381 #pragma GCC diagnostic pop
382 
383 int artdaq::MulticastTransfer::receiveFragmentHeader(detail::RawFragmentHeader& header, size_t receiveTimeout)
384 {
385  auto ret = receiveFragment(fragment_buffer_, receiveTimeout);
386  if (ret == source_rank())
387  {
388  header = *reinterpret_cast<detail::RawFragmentHeader*>(fragment_buffer_.headerAddress());
389  return source_rank();
390  }
391  return ret;
392 }
393 
394 int artdaq::MulticastTransfer::receiveFragmentData(RawDataType* destination, size_t wordCount)
395 {
396  if (fragment_buffer_.size() > detail::RawFragmentHeader::num_words()) {
397  auto dataSize = (fragment_buffer_.size() - detail::RawFragmentHeader::num_words()) * sizeof(RawDataType);
398  memcpy(destination, fragment_buffer_.headerAddress() + detail::RawFragmentHeader::num_words(), dataSize);
399  return source_rank();
400  }
401  return RECV_TIMEOUT;
402 }
403 
404 
405 // Reliable transport is undefined for multicast; just use copy
408 {
409  return copyFragment(f, 100000000);
410 }
411 
413 artdaq::MulticastTransfer::copyFragment(artdaq::Fragment& fragment,
414  size_t send_timeout_usec)
415 {
416  assert(TransferInterface::role() == Role::kSend);
417 
418  if (fragment.sizeBytes() > max_fragment_size_words_)
419  {
420  throw cet::exception("MulticastTransfer") << "Error in MulticastTransfer::copyFragmentTo: " <<
421  fragment.sizeBytes() << " byte fragment exceeds max_fragment_size of " << max_fragment_size_words_;
422  }
423 
424  static size_t ncalls = 1;
425  auto num_subfragments = static_cast<size_t>(std::ceil(fragment.sizeBytes() / static_cast<float>(subfragment_size_)));
426 
427  ncalls++;
428 
429  fill_staging_memory(fragment);
430 
431  for (size_t batch_index = 0; ; batch_index++)
432  {
433  auto first_subfragment = batch_index * subfragments_per_send_;
434  auto last_subfragment = (batch_index + 1) * subfragments_per_send_ >= num_subfragments ?
435  num_subfragments - 1 :
436  (batch_index + 1) * subfragments_per_send_ - 1;
437 
438  std::vector<boost::asio::const_buffer> buffers;
439 
440  book_container_of_buffers(buffers, fragment.sizeBytes(), num_subfragments, first_subfragment, last_subfragment);
441 
442  socket_->send_to(buffers, *multicast_endpoint_);
443 
444  usleep(pause_on_copy_usecs_);
445 
446  if (last_subfragment == num_subfragments - 1)
447  {
448  break;
449  }
450  }
451  return CopyStatus::kSuccess;
452 }
453 
454 #pragma GCC diagnostic push
455 #pragma GCC diagnostic ignored "-Wsign-compare"
456 
457 void artdaq::MulticastTransfer::fill_staging_memory(const artdaq::Fragment& fragment)
458 {
459  auto num_subfragments = static_cast<size_t>(std::ceil(fragment.sizeBytes() / static_cast<float>(subfragment_size_)));
460  TLOG(TLVL_DEBUG) << GetTraceName() << ": # of subfragments to use is " << num_subfragments ;
461 
462  for (auto i_s = 0; i_s < num_subfragments; ++i_s)
463  {
464  auto staging_memory_copyto = &staging_memory_.at(i_s * (sizeof(subfragment_identifier) + subfragment_size_));
465 
466  subfragment_identifier sfi(fragment.sequenceID(), fragment.fragmentID(), i_s);
467 
468  std::copy(reinterpret_cast<byte_t*>(&sfi),
469  reinterpret_cast<byte_t*>(&sfi) + sizeof(subfragment_identifier),
470  staging_memory_copyto);
471 
472  auto low_ptr_into_fragment = fragment.headerBeginBytes() + subfragment_size_ * i_s;
473 
474  auto high_ptr_into_fragment = (i_s == num_subfragments - 1) ?
475  fragment.dataEndBytes() :
476  fragment.headerBeginBytes() + subfragment_size_ * (i_s + 1);
477 
478  std::copy(low_ptr_into_fragment,
479  high_ptr_into_fragment,
480  staging_memory_copyto + sizeof(subfragment_identifier));
481  }
482 }
483 
484 #pragma GCC diagnostic pop
485 
486 // Note that book_container_of_buffers includes, rather than excludes,
487 // "last_subfragment_num"; in this regard it's different than the way
488 // STL functions receive iterators. Note also that the lowest possible
489 // value for "first_subfragment_num" is 0, not 1.
490 
491 template <typename T>
492 void artdaq::MulticastTransfer::book_container_of_buffers(std::vector<T>& buffers,
493  const size_t fragment_size,
494  const size_t total_subfragments,
495  const size_t first_subfragment_num,
496  const size_t last_subfragment_num)
497 {
498  assert(staging_memory_.size() >= total_subfragments * (sizeof(subfragment_identifier) + subfragment_size_));
499  assert(buffers.size() == 0);
500  assert(last_subfragment_num < total_subfragments);
501 
502  for (auto i_f = first_subfragment_num; i_f <= last_subfragment_num; ++i_f)
503  {
504  auto bytes_to_store = (i_f == total_subfragments - 1) ?
505  sizeof(subfragment_identifier) + (fragment_size - (total_subfragments - 1) * subfragment_size_) :
506  sizeof(subfragment_identifier) + subfragment_size_;
507 
508  buffers.emplace_back(&staging_memory_.at(i_f * (sizeof(subfragment_identifier) + subfragment_size_)),
509  bytes_to_store);
510  }
511 }
512 
513 
514 #pragma GCC diagnostic push // Needed since profile builds will ignore the assert
515 #pragma GCC diagnostic ignored "-Wunused-variable"
516 
517 void artdaq::MulticastTransfer::get_fragment_quantities(const boost::asio::mutable_buffer& buf, size_t& payload_size,
518  size_t& fragment_size,
519  size_t& expected_subfragments)
520 {
521  byte_t* buffer_ptr = boost::asio::buffer_cast<byte_t*>(buf);
522 
523  auto subfragment_num = *(reinterpret_cast<size_t*>(buffer_ptr) + 2);
524 
525  assert(subfragment_num == 0);
526 
527  artdaq::detail::RawFragmentHeader* header =
528  reinterpret_cast<artdaq::detail::RawFragmentHeader*>(buffer_ptr + sizeof(subfragment_identifier));
529 
530  fragment_size = header->word_count * sizeof(artdaq::RawDataType);
531 
532  auto metadata_size = header->metadata_word_count * sizeof(artdaq::RawDataType);
533  payload_size = fragment_size - metadata_size - artdaq::detail::RawFragmentHeader::num_words() *
534  sizeof(artdaq::RawDataType);
535 
536  assert(fragment_size ==
537  artdaq::detail::RawFragmentHeader::num_words() * sizeof(artdaq::RawDataType) +
538  metadata_size +
539  payload_size);
540 
541  expected_subfragments = static_cast<size_t>(std::ceil(fragment_size / static_cast<float>(subfragment_size_)));
542 }
543 #pragma GCC diagnostic pop
544 
545 void artdaq::MulticastTransfer::set_receive_buffer_size(size_t recv_buff_size)
546 {
547  if (recv_buff_size == 0) return;
548  boost::asio::socket_base::receive_buffer_size actual_recv_buff_size;
549  socket_->get_option(actual_recv_buff_size);
550 
551  TLOG(TLVL_DEBUG) << GetTraceName() << ": Receive buffer size is currently " << actual_recv_buff_size.value() <<
552  " bytes, will try to change it to " << recv_buff_size ;
553 
554  boost::asio::socket_base::receive_buffer_size recv_buff_option(recv_buff_size);
555 
556  boost::system::error_code ec;
557  socket_->set_option(recv_buff_option, ec);
558 
559  if (ec != 0)
560  {
561  std::cerr << "boost::system::error_code with value " << ec <<
562  " was found in attempt to change receive buffer" << std::endl;
563  }
564 
565  socket_->get_option(actual_recv_buff_size);
566  TLOG(TLVL_DEBUG) << GetTraceName() << ": After attempted change, receive buffer size is now " << actual_recv_buff_size.value() ;
567 }
568 
569 #pragma GCC diagnostic pop
570 
571 DEFINE_ARTDAQ_TRANSFER(artdaq::MulticastTransfer)
int receiveFragment(artdaq::Fragment &fragment, size_t receiveTimeout) override
Receive a Fragment using Multicast.
Role role() const
Get the TransferInterface::Role of this TransferInterface.
int receiveFragmentHeader(detail::RawFragmentHeader &header, size_t receiveTimeout) override
Receive a Fragment Header from the transport mechanism.
bool isRunning() override
Determine whether the TransferInterface plugin is able to send/receive data.
CopyStatus copyFragment(artdaq::Fragment &fragment, size_t send_timeout_usec) override
Copy a Fragment to the destination. Multicast is always unreliable.
MulticastTransfer is a TransferInterface implementation plugin that transfers data using Multicast...
This TransferInterface is a Receiver.
int receiveFragmentData(RawDataType *destination, size_t wordCount) override
Receive the body of a Fragment to the given destination pointer.
This TransferInterface is a Sender.
virtual ~MulticastTransfer()=default
Default destructor.
Role
Used to determine if a TransferInterface is a Sender or Receiver.
MulticastTransfer(fhicl::ParameterSet const &ps, Role role)
MulticastTransfer Constructor.
CopyStatus moveFragment(artdaq::Fragment &&fragment) override
Move a Fragment to the destination. Multicast is always unreliable.
This interface defines the functions used to transfer data between artdaq applications.
artdaq::Fragment::byte_t byte_t
Copy Fragment::byte_t into local scope.
Value to be returned upon receive timeout.
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.
const size_t max_fragment_size_words_
The maximum size of the transferred Fragment objects, in artdaq::Fragment::RawDataType words...