artdaq  v3_03_00
Multicast_transfer.cc
1 #define TRACE_NAME (app_name + "_MulticastTransfer").c_str()
2 #include "artdaq/DAQdata/Globals.hh"
3 
4 #include "artdaq/TransferPlugins/TransferInterface.hh"
5 
6 #include "artdaq-core/Data/Fragment.hh"
7 #include "artdaq-core/Utilities/ExceptionHandler.hh"
8 
9 #include "fhiclcpp/ParameterSet.h"
10 #include "cetlib_except/exception.h"
11 
12 #include <boost/asio.hpp>
13 #include <boost/bind.hpp>
14 
15 #include <iostream>
16 #include <vector>
17 #include <cassert>
18 #include <string>
19 #include <type_traits>
20 #include <bitset>
21 
22 #pragma GCC diagnostic push
23 #pragma GCC diagnostic ignored "-Wunused-parameter"
24 
25 
26 namespace artdaq
27 {
32  {
33  public:
34 
35  using byte_t = artdaq::Fragment::byte_t;
36 
40  virtual ~MulticastTransfer() = default;
41 
59  MulticastTransfer(fhicl::ParameterSet const& ps, Role role);
60 
67  int receiveFragment(artdaq::Fragment& fragment,
68  size_t receiveTimeout) override;
69 
76  int receiveFragmentHeader(detail::RawFragmentHeader& header, size_t receiveTimeout) override;
77 
84  int receiveFragmentData(RawDataType* destination, size_t wordCount) override;
85 
92  CopyStatus copyFragment(artdaq::Fragment& fragment, size_t send_timeout_usec) override;
93 
99  CopyStatus moveFragment(artdaq::Fragment&& fragment) override;
100 
105  bool isRunning() override { return socket_ != nullptr; }
106  private:
107 
108  void fill_staging_memory(const artdaq::Fragment& frag);
109 
110  template <typename T>
111  void book_container_of_buffers(std::vector<T>& buffers,
112  const size_t fragment_size,
113  const size_t total_subfragments,
114  const size_t first_subfragment_num,
115  const size_t last_subfragment_num);
116 
117  void get_fragment_quantities(const boost::asio::mutable_buffer& buf, size_t& payload_size, size_t& fragment_size,
118  size_t& expected_subfragments);
119 
120  void set_receive_buffer_size(size_t recv_buff_size);
121 
122  class subfragment_identifier
123  {
124  public:
125 
126  subfragment_identifier(size_t sequenceID, size_t fragmentID, size_t subfragment_number) :
127  sequenceID_(sequenceID)
128  , fragmentID_(fragmentID)
129  , subfragment_number_(subfragment_number) { }
130 
131  size_t sequenceID() const { return sequenceID_; }
132  size_t fragmentID() const { return fragmentID_; }
133  size_t subfragment_number() const { return subfragment_number_; }
134 
135  private:
136  size_t sequenceID_;
137  size_t fragmentID_;
138  size_t subfragment_number_;
139  };
140 
141  std::unique_ptr<boost::asio::io_service> io_service_;
142 
143  std::unique_ptr<boost::asio::ip::udp::endpoint> local_endpoint_;
144  std::unique_ptr<boost::asio::ip::udp::endpoint> multicast_endpoint_;
145  std::unique_ptr<boost::asio::ip::udp::endpoint> opposite_endpoint_;
146 
147  std::unique_ptr<boost::asio::ip::udp::socket> socket_;
148 
149  size_t subfragment_size_;
150  size_t subfragments_per_send_;
151 
152  size_t pause_on_copy_usecs_;
153  Fragment fragment_buffer_;
154 
155  std::vector<byte_t> staging_memory_;
156 
157  std::vector<boost::asio::mutable_buffer> receive_buffers_;
158  };
159 }
160 
161 artdaq::MulticastTransfer::MulticastTransfer(fhicl::ParameterSet const& pset, Role role) :
162  TransferInterface(pset, role)
163  , io_service_(std::make_unique<std::remove_reference<decltype(*io_service_)>::type>())
164  , local_endpoint_(nullptr)
165  , multicast_endpoint_(nullptr)
166  , opposite_endpoint_(std::make_unique<std::remove_reference<decltype(*opposite_endpoint_)>::type>())
167  , socket_(nullptr)
168  , subfragment_size_(pset.get<size_t>("subfragment_size"))
169  , subfragments_per_send_(pset.get<size_t>("subfragments_per_send"))
170  , pause_on_copy_usecs_(pset.get<size_t>("pause_on_copy_usecs", 0))
171 {
172  try
173  {
174  portMan->UpdateConfiguration(pset);
175  auto port = portMan->GetMulticastTransferPort(source_rank());
176  auto multicast_address = boost::asio::ip::address::from_string(portMan->GetMulticastTransferGroupAddress());
177  auto local_address = boost::asio::ip::address::from_string(pset.get<std::string>("local_address"));
178 
179  TLOG(TLVL_DEBUG) << GetTraceName() << ": multicast address is set to " << multicast_address ;
180  TLOG(TLVL_DEBUG) << GetTraceName() << ": local address is set to " << local_address ;
181 
183  {
184  local_endpoint_ = std::make_unique<std::remove_reference<decltype(*local_endpoint_)>::type>(local_address, 0);
185  multicast_endpoint_ = std::make_unique<std::remove_reference<decltype(*multicast_endpoint_)>::type>(multicast_address, port);
186 
187  socket_ = std::make_unique<std::remove_reference<decltype(*socket_)>::type>(*io_service_,
188  multicast_endpoint_->protocol());
189  socket_->bind(*local_endpoint_);
190  }
191  else
192  { // TransferInterface::role() == Role::kReceive
193 
194  // Create the socket so that multiple may be bound to the same address.
195 
196  local_endpoint_ = std::make_unique<std::remove_reference<decltype(*local_endpoint_)>::type>(local_address, port);
197  socket_ = std::make_unique<std::remove_reference<decltype(*socket_)>::type>(*io_service_,
198  local_endpoint_->protocol());
199 
200  boost::system::error_code ec;
201 
202  socket_->set_option(boost::asio::ip::udp::socket::reuse_address(true), ec);
203 
204  if (ec != 0)
205  {
206  std::cerr << "boost::system::error_code with value " << ec << " was found in setting reuse_address option" << std::endl;
207  }
208 
209  set_receive_buffer_size(pset.get<size_t>("receive_buffer_size", 0));
210 
211  socket_->bind(boost::asio::ip::udp::endpoint(multicast_address, port));
212 
213  // Join the multicast group.
214 
215  socket_->set_option(boost::asio::ip::multicast::join_group(multicast_address), ec);
216 
217  if (ec != 0)
218  {
219  std::cerr << "boost::system::error_code with value " << ec << " was found in attempt to join multicast group" << std::endl;
220  }
221  }
222  }
223  catch (...)
224  {
225  ExceptionHandler(ExceptionHandlerRethrow::yes, "Problem setting up the socket in MulticastTransfer");
226  }
227 
228  auto max_subfragments =
229  static_cast<size_t>(std::ceil(max_fragment_size_words_ / static_cast<float>(subfragment_size_)));
230 
231  staging_memory_.resize(max_subfragments * (sizeof(subfragment_identifier) + subfragment_size_));
232 
234  {
235  book_container_of_buffers(receive_buffers_, max_fragment_size_words_, max_subfragments, 0, max_subfragments - 1);
236  }
237 
238  TLOG(TLVL_DEBUG) << GetTraceName() << ": max_subfragments is " << max_subfragments ;
239  TLOG(TLVL_DEBUG) << GetTraceName() << ": Staging buffer size is " << staging_memory_.size() ;
240 }
241 
242 #pragma GCC diagnostic push
243 #pragma GCC diagnostic ignored "-Wunused-variable"
244 
245 int artdaq::MulticastTransfer::receiveFragment(artdaq::Fragment& fragment,
246  size_t receiveTimeout)
247 {
248  assert(TransferInterface::role() == Role::kReceive);
249 
250  if (fragment.dataSizeBytes() > 0)
251  {
252  throw cet::exception("MulticastTransfer") << "Error in MulticastTransfer::receiveFragmentFrom: " <<
253  "nonzero payload found in fragment passed as argument";
254  }
255 
256  static bool print_warning = true;
257 
258  if (print_warning)
259  {
260  std::cerr << "Please note that MulticastTransfer::receiveFragmentFrom does not use its receiveTimeout argument" << std::endl;
261  print_warning = false;
262  }
263 
264  fragment.resizeBytes(max_fragment_size_words_ - sizeof(artdaq::detail::RawFragmentHeader));
265 
266  static auto current_sequenceID = std::numeric_limits<Fragment::sequence_id_t>::max();
267  static auto current_fragmentID = std::numeric_limits<Fragment::fragment_id_t>::max();
268 
269  size_t fragment_size = 0;
270  size_t expected_subfragments = 0;
271  size_t current_subfragments = 0;
272  bool fragment_complete = false;
273  bool last_fragment_truncated = false;
274 
275  while (true)
276  {
277  auto bytes_received = socket_->receive_from(receive_buffers_, *opposite_endpoint_);
278 
279  size_t bytes_processed = 0;
280 
281  for (auto& buf : receive_buffers_)
282  {
283  auto buf_size = boost::asio::buffer_size(buf);
284  auto size_t_ptr = boost::asio::buffer_cast<const size_t*>(buf);
285  auto seqID = *size_t_ptr;
286  auto fragID = *(size_t_ptr + 1);
287  auto subfragID = *(size_t_ptr + 2);
288 
289  if (seqID != current_sequenceID || fragID != current_fragmentID)
290  {
291  // JCF, Jun-22-2016
292  // Code currently operates under the assumption that all subfragments from the call are from the same fragment
293 
294  assert(bytes_processed == 0);
295 
296  if (current_subfragments < expected_subfragments)
297  {
298  last_fragment_truncated = true;
299 
300  if (expected_subfragments != std::numeric_limits<size_t>::max())
301  {
302  std::cerr << "Warning: only received " << current_subfragments << " subfragments for fragment with seqID = " <<
303  current_sequenceID << ", fragID = " << current_fragmentID << " (expected " << expected_subfragments << ")\n"
304  << std::endl;
305  }
306  else
307  {
308  std::cerr << "Warning: only received " << current_subfragments <<
309  " subfragments for fragment with seqID = " <<
310  current_sequenceID << ", fragID = " << current_fragmentID <<
311  ", # of expected subfragments is unknown as fragment header was not received)\n"
312  << std::endl;
313  }
314  }
315 
316  current_subfragments = 0;
317  fragment_size = std::numeric_limits<size_t>::max();
318  expected_subfragments = std::numeric_limits<size_t>::max();
319  current_sequenceID = seqID;
320  current_fragmentID = fragID;
321  }
322 
323  auto ptr_into_fragment = fragment.headerBeginBytes() + subfragID * subfragment_size_;
324 
325  auto ptr_into_buffer = boost::asio::buffer_cast<const byte_t*>(buf) + sizeof(subfragment_identifier);
326 
327  std::copy(ptr_into_buffer, ptr_into_buffer + buf_size - sizeof(subfragment_identifier), ptr_into_fragment);
328 
329  if (subfragID == 0)
330  {
331  if (buf_size >= sizeof(subfragment_identifier) + sizeof(artdaq::detail::RawFragmentHeader))
332  {
333  auto payload_size = std::numeric_limits<size_t>::max();
334  get_fragment_quantities(buf, payload_size, fragment_size, expected_subfragments);
335 
336  fragment.resizeBytes(payload_size);
337  }
338  else
339  {
340  throw cet::exception("MulticastTransfer") << "Buffer size is too small to completely contain an artdaq::Fragment header; " <<
341  "please increase the default size";
342  }
343  }
344 
345  current_subfragments++;
346 
347  if (current_subfragments == expected_subfragments)
348  {
349  fragment_complete = true;
350  }
351 
352  bytes_processed += buf_size;
353 
354  if (bytes_processed >= bytes_received)
355  {
356  break;
357  }
358  }
359 
360  if (last_fragment_truncated)
361  {
362  // JCF, 7-7-2017
363 
364  // Don't yet have code to handle the scenario where the set of
365  // subfragments received in the last iteration of the loop was
366  // its own complete fragment, but we know the previous fragment
367  // to be incomplete
368 
369  assert(!fragment_complete);
370  TLOG(TLVL_WARNING) << GetTraceName() << ": Got an incomplete fragment" ;
372  }
373 
374  if (fragment_complete)
375  {
376  return source_rank();
377  }
378  }
379 
381 }
382 
383 #pragma GCC diagnostic pop
384 
385 int artdaq::MulticastTransfer::receiveFragmentHeader(detail::RawFragmentHeader& header, size_t receiveTimeout)
386 {
387  auto ret = receiveFragment(fragment_buffer_, receiveTimeout);
388  if (ret == source_rank())
389  {
390  header = *reinterpret_cast<detail::RawFragmentHeader*>(fragment_buffer_.headerAddress());
391  return source_rank();
392  }
393  return ret;
394 }
395 
396 int artdaq::MulticastTransfer::receiveFragmentData(RawDataType* destination, size_t wordCount)
397 {
398  if (fragment_buffer_.size() > detail::RawFragmentHeader::num_words()) {
399  auto dataSize = (fragment_buffer_.size() - detail::RawFragmentHeader::num_words()) * sizeof(RawDataType);
400  memcpy(destination, fragment_buffer_.headerAddress() + detail::RawFragmentHeader::num_words(), dataSize);
401  return source_rank();
402  }
403  return RECV_TIMEOUT;
404 }
405 
406 
407 // Reliable transport is undefined for multicast; just use copy
410 {
411  return copyFragment(f, 100000000);
412 }
413 
415 artdaq::MulticastTransfer::copyFragment(artdaq::Fragment& fragment,
416  size_t send_timeout_usec)
417 {
418  assert(TransferInterface::role() == Role::kSend);
419 
420  if (fragment.sizeBytes() > max_fragment_size_words_)
421  {
422  throw cet::exception("MulticastTransfer") << "Error in MulticastTransfer::copyFragmentTo: " <<
423  fragment.sizeBytes() << " byte fragment exceeds max_fragment_size of " << max_fragment_size_words_;
424  }
425 
426  static size_t ncalls = 1;
427  auto num_subfragments = static_cast<size_t>(std::ceil(fragment.sizeBytes() / static_cast<float>(subfragment_size_)));
428 
429  ncalls++;
430 
431  fill_staging_memory(fragment);
432 
433  for (size_t batch_index = 0; ; batch_index++)
434  {
435  auto first_subfragment = batch_index * subfragments_per_send_;
436  auto last_subfragment = (batch_index + 1) * subfragments_per_send_ >= num_subfragments ?
437  num_subfragments - 1 :
438  (batch_index + 1) * subfragments_per_send_ - 1;
439 
440  std::vector<boost::asio::const_buffer> buffers;
441 
442  book_container_of_buffers(buffers, fragment.sizeBytes(), num_subfragments, first_subfragment, last_subfragment);
443 
444  socket_->send_to(buffers, *multicast_endpoint_);
445 
446  usleep(pause_on_copy_usecs_);
447 
448  if (last_subfragment == num_subfragments - 1)
449  {
450  break;
451  }
452  }
453  return CopyStatus::kSuccess;
454 }
455 
456 #pragma GCC diagnostic push
457 #pragma GCC diagnostic ignored "-Wsign-compare"
458 
459 void artdaq::MulticastTransfer::fill_staging_memory(const artdaq::Fragment& fragment)
460 {
461  auto num_subfragments = static_cast<size_t>(std::ceil(fragment.sizeBytes() / static_cast<float>(subfragment_size_)));
462  TLOG(TLVL_DEBUG) << GetTraceName() << ": # of subfragments to use is " << num_subfragments ;
463 
464  for (auto i_s = 0; i_s < num_subfragments; ++i_s)
465  {
466  auto staging_memory_copyto = &staging_memory_.at(i_s * (sizeof(subfragment_identifier) + subfragment_size_));
467 
468  subfragment_identifier sfi(fragment.sequenceID(), fragment.fragmentID(), i_s);
469 
470  std::copy(reinterpret_cast<byte_t*>(&sfi),
471  reinterpret_cast<byte_t*>(&sfi) + sizeof(subfragment_identifier),
472  staging_memory_copyto);
473 
474  auto low_ptr_into_fragment = fragment.headerBeginBytes() + subfragment_size_ * i_s;
475 
476  auto high_ptr_into_fragment = (i_s == num_subfragments - 1) ?
477  fragment.dataEndBytes() :
478  fragment.headerBeginBytes() + subfragment_size_ * (i_s + 1);
479 
480  std::copy(low_ptr_into_fragment,
481  high_ptr_into_fragment,
482  staging_memory_copyto + sizeof(subfragment_identifier));
483  }
484 }
485 
486 #pragma GCC diagnostic pop
487 
488 // Note that book_container_of_buffers includes, rather than excludes,
489 // "last_subfragment_num"; in this regard it's different than the way
490 // STL functions receive iterators. Note also that the lowest possible
491 // value for "first_subfragment_num" is 0, not 1.
492 
493 template <typename T>
494 void artdaq::MulticastTransfer::book_container_of_buffers(std::vector<T>& buffers,
495  const size_t fragment_size,
496  const size_t total_subfragments,
497  const size_t first_subfragment_num,
498  const size_t last_subfragment_num)
499 {
500  assert(staging_memory_.size() >= total_subfragments * (sizeof(subfragment_identifier) + subfragment_size_));
501  assert(buffers.size() == 0);
502  assert(last_subfragment_num < total_subfragments);
503 
504  for (auto i_f = first_subfragment_num; i_f <= last_subfragment_num; ++i_f)
505  {
506  auto bytes_to_store = (i_f == total_subfragments - 1) ?
507  sizeof(subfragment_identifier) + (fragment_size - (total_subfragments - 1) * subfragment_size_) :
508  sizeof(subfragment_identifier) + subfragment_size_;
509 
510  buffers.emplace_back(&staging_memory_.at(i_f * (sizeof(subfragment_identifier) + subfragment_size_)),
511  bytes_to_store);
512  }
513 }
514 
515 
516 #pragma GCC diagnostic push // Needed since profile builds will ignore the assert
517 #pragma GCC diagnostic ignored "-Wunused-variable"
518 
519 void artdaq::MulticastTransfer::get_fragment_quantities(const boost::asio::mutable_buffer& buf, size_t& payload_size,
520  size_t& fragment_size,
521  size_t& expected_subfragments)
522 {
523  byte_t* buffer_ptr = boost::asio::buffer_cast<byte_t*>(buf);
524 
525  auto subfragment_num = *(reinterpret_cast<size_t*>(buffer_ptr) + 2);
526 
527  assert(subfragment_num == 0);
528 
529  artdaq::detail::RawFragmentHeader* header =
530  reinterpret_cast<artdaq::detail::RawFragmentHeader*>(buffer_ptr + sizeof(subfragment_identifier));
531 
532  fragment_size = header->word_count * sizeof(artdaq::RawDataType);
533 
534  auto metadata_size = header->metadata_word_count * sizeof(artdaq::RawDataType);
535  payload_size = fragment_size - metadata_size - artdaq::detail::RawFragmentHeader::num_words() *
536  sizeof(artdaq::RawDataType);
537 
538  assert(fragment_size ==
539  artdaq::detail::RawFragmentHeader::num_words() * sizeof(artdaq::RawDataType) +
540  metadata_size +
541  payload_size);
542 
543  expected_subfragments = static_cast<size_t>(std::ceil(fragment_size / static_cast<float>(subfragment_size_)));
544 }
545 #pragma GCC diagnostic pop
546 
547 void artdaq::MulticastTransfer::set_receive_buffer_size(size_t recv_buff_size)
548 {
549  if (recv_buff_size == 0) return;
550  boost::asio::socket_base::receive_buffer_size actual_recv_buff_size;
551  socket_->get_option(actual_recv_buff_size);
552 
553  TLOG(TLVL_DEBUG) << GetTraceName() << ": Receive buffer size is currently " << actual_recv_buff_size.value() <<
554  " bytes, will try to change it to " << recv_buff_size ;
555 
556  boost::asio::socket_base::receive_buffer_size recv_buff_option(recv_buff_size);
557 
558  boost::system::error_code ec;
559  socket_->set_option(recv_buff_option, ec);
560 
561  if (ec != 0)
562  {
563  std::cerr << "boost::system::error_code with value " << ec <<
564  " was found in attempt to change receive buffer" << std::endl;
565  }
566 
567  socket_->get_option(actual_recv_buff_size);
568  TLOG(TLVL_DEBUG) << GetTraceName() << ": After attempted change, receive buffer size is now " << actual_recv_buff_size.value() ;
569 }
570 
571 #pragma GCC diagnostic pop
572 
573 DEFINE_ARTDAQ_TRANSFER(artdaq::MulticastTransfer)
virtual int source_rank() const
Get the source rank for this TransferInterface instance.
int receiveFragment(artdaq::Fragment &fragment, size_t receiveTimeout) override
Receive a Fragment using Multicast.
Role role() const
Get the TransferInterface::Role of this TransferInterface.
int receiveFragmentHeader(detail::RawFragmentHeader &header, size_t receiveTimeout) override
Receive a Fragment Header from the transport mechanism.
bool isRunning() override
Determine whether the TransferInterface plugin is able to send/receive data.
CopyStatus copyFragment(artdaq::Fragment &fragment, size_t send_timeout_usec) override
Copy a Fragment to the destination. Multicast is always unreliable.
MulticastTransfer is a TransferInterface implementation plugin that transfers data using Multicast...
This TransferInterface is a Receiver.
int receiveFragmentData(RawDataType *destination, size_t wordCount) override
Receive the body of a Fragment to the given destination pointer.
This TransferInterface is a Sender.
virtual ~MulticastTransfer()=default
Default destructor.
Role
Used to determine if a TransferInterface is a Sender or Receiver.
MulticastTransfer(fhicl::ParameterSet const &ps, Role role)
MulticastTransfer Constructor.
CopyStatus moveFragment(artdaq::Fragment &&fragment) override
Move a Fragment to the destination. Multicast is always unreliable.
This interface defines the functions used to transfer data between artdaq applications.
artdaq::Fragment::byte_t byte_t
Copy Fragment::byte_t into local scope.
Value to be returned upon receive timeout.
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.
const size_t max_fragment_size_words_
The maximum size of the transferred Fragment objects, in artdaq::Fragment::RawDataType words...