$treeview $search $mathjax $extrastylesheet
artdaq
v3_04_01
$projectbrief
|
$projectbrief
|
$searchbox |
00001 #define TRACE_NAME (app_name + "_MulticastTransfer").c_str() 00002 #include "artdaq/DAQdata/Globals.hh" 00003 00004 #include "artdaq/TransferPlugins/TransferInterface.hh" 00005 00006 #include "artdaq-core/Data/Fragment.hh" 00007 #include "artdaq-core/Utilities/ExceptionHandler.hh" 00008 00009 #include "fhiclcpp/ParameterSet.h" 00010 #include "cetlib_except/exception.h" 00011 00012 #include <boost/asio.hpp> 00013 #include <boost/bind.hpp> 00014 00015 #include <iostream> 00016 #include <vector> 00017 #include <cassert> 00018 #include <string> 00019 #include <type_traits> 00020 #include <bitset> 00021 00022 #pragma GCC diagnostic push 00023 #pragma GCC diagnostic ignored "-Wunused-parameter" 00024 00025 00026 namespace artdaq 00027 { 00031 class MulticastTransfer : public TransferInterface 00032 { 00033 public: 00034 00035 using byte_t = artdaq::Fragment::byte_t; 00036 00040 virtual ~MulticastTransfer() = default; 00041 00059 MulticastTransfer(fhicl::ParameterSet const& ps, Role role); 00060 00067 int receiveFragment(artdaq::Fragment& fragment, 00068 size_t receiveTimeout) override; 00069 00076 int receiveFragmentHeader(detail::RawFragmentHeader& header, size_t receiveTimeout) override; 00077 00084 int receiveFragmentData(RawDataType* destination, size_t wordCount) override; 00085 00092 CopyStatus transfer_fragment_min_blocking_mode(artdaq::Fragment const& fragment, size_t send_timeout_usec) override; 00093 00099 CopyStatus transfer_fragment_reliable_mode(artdaq::Fragment&& fragment) override; 00100 00105 bool isRunning() override { return socket_ != nullptr; } 00106 00111 void flush_buffers() override {} 00112 private: 00113 00114 void fill_staging_memory(const artdaq::Fragment& frag); 00115 00116 template <typename T> 00117 void book_container_of_buffers(std::vector<T>& buffers, 00118 const size_t fragment_size, 00119 const size_t total_subfragments, 00120 const size_t first_subfragment_num, 00121 const size_t last_subfragment_num); 00122 00123 void get_fragment_quantities(const boost::asio::mutable_buffer& buf, size_t& payload_size, size_t& fragment_size, 00124 size_t& expected_subfragments); 00125 00126 void set_receive_buffer_size(size_t recv_buff_size); 00127 00128 class subfragment_identifier 00129 { 00130 public: 00131 00132 subfragment_identifier(size_t sequenceID, size_t fragmentID, size_t subfragment_number) : 00133 sequenceID_(sequenceID) 00134 , fragmentID_(fragmentID) 00135 , subfragment_number_(subfragment_number) { } 00136 00137 size_t sequenceID() const { return sequenceID_; } 00138 size_t fragmentID() const { return fragmentID_; } 00139 size_t subfragment_number() const { return subfragment_number_; } 00140 00141 private: 00142 size_t sequenceID_; 00143 size_t fragmentID_; 00144 size_t subfragment_number_; 00145 }; 00146 00147 std::unique_ptr<boost::asio::io_service> io_service_; 00148 00149 std::unique_ptr<boost::asio::ip::udp::endpoint> local_endpoint_; 00150 std::unique_ptr<boost::asio::ip::udp::endpoint> multicast_endpoint_; 00151 std::unique_ptr<boost::asio::ip::udp::endpoint> opposite_endpoint_; 00152 00153 std::unique_ptr<boost::asio::ip::udp::socket> socket_; 00154 00155 size_t subfragment_size_; 00156 size_t subfragments_per_send_; 00157 00158 size_t pause_on_copy_usecs_; 00159 Fragment fragment_buffer_; 00160 00161 std::vector<byte_t> staging_memory_; 00162 00163 std::vector<boost::asio::mutable_buffer> receive_buffers_; 00164 }; 00165 } 00166 00167 artdaq::MulticastTransfer::MulticastTransfer(fhicl::ParameterSet const& pset, Role role) : 00168 TransferInterface(pset, role) 00169 , io_service_(std::make_unique<std::remove_reference<decltype(*io_service_)>::type>()) 00170 , local_endpoint_(nullptr) 00171 , multicast_endpoint_(nullptr) 00172 , opposite_endpoint_(std::make_unique<std::remove_reference<decltype(*opposite_endpoint_)>::type>()) 00173 , socket_(nullptr) 00174 , subfragment_size_(pset.get<size_t>("subfragment_size")) 00175 , subfragments_per_send_(pset.get<size_t>("subfragments_per_send")) 00176 , pause_on_copy_usecs_(pset.get<size_t>("pause_on_copy_usecs", 0)) 00177 { 00178 try 00179 { 00180 portMan->UpdateConfiguration(pset); 00181 auto port = portMan->GetMulticastTransferPort(source_rank()); 00182 auto multicast_address = boost::asio::ip::address::from_string(portMan->GetMulticastTransferGroupAddress()); 00183 auto local_address = boost::asio::ip::address::from_string(pset.get<std::string>("local_address")); 00184 00185 TLOG(TLVL_DEBUG) << GetTraceName() << ": multicast address is set to " << multicast_address ; 00186 TLOG(TLVL_DEBUG) << GetTraceName() << ": local address is set to " << local_address ; 00187 00188 if (TransferInterface::role() == Role::kSend) 00189 { 00190 local_endpoint_ = std::make_unique<std::remove_reference<decltype(*local_endpoint_)>::type>(local_address, 0); 00191 multicast_endpoint_ = std::make_unique<std::remove_reference<decltype(*multicast_endpoint_)>::type>(multicast_address, port); 00192 00193 socket_ = std::make_unique<std::remove_reference<decltype(*socket_)>::type>(*io_service_, 00194 multicast_endpoint_->protocol()); 00195 socket_->bind(*local_endpoint_); 00196 } 00197 else 00198 { // TransferInterface::role() == Role::kReceive 00199 00200 // Create the socket so that multiple may be bound to the same address. 00201 00202 local_endpoint_ = std::make_unique<std::remove_reference<decltype(*local_endpoint_)>::type>(local_address, port); 00203 socket_ = std::make_unique<std::remove_reference<decltype(*socket_)>::type>(*io_service_, 00204 local_endpoint_->protocol()); 00205 00206 boost::system::error_code ec; 00207 00208 socket_->set_option(boost::asio::ip::udp::socket::reuse_address(true), ec); 00209 00210 if (ec != 0) 00211 { 00212 std::cerr << "boost::system::error_code with value " << ec << " was found in setting reuse_address option" << std::endl; 00213 } 00214 00215 set_receive_buffer_size(pset.get<size_t>("receive_buffer_size", 0)); 00216 00217 socket_->bind(boost::asio::ip::udp::endpoint(multicast_address, port)); 00218 00219 // Join the multicast group. 00220 00221 socket_->set_option(boost::asio::ip::multicast::join_group(multicast_address), ec); 00222 00223 if (ec != 0) 00224 { 00225 std::cerr << "boost::system::error_code with value " << ec << " was found in attempt to join multicast group" << std::endl; 00226 } 00227 } 00228 } 00229 catch (...) 00230 { 00231 ExceptionHandler(ExceptionHandlerRethrow::yes, "Problem setting up the socket in MulticastTransfer"); 00232 } 00233 00234 auto max_subfragments = 00235 static_cast<size_t>(std::ceil(max_fragment_size_words_ / static_cast<float>(subfragment_size_))); 00236 00237 staging_memory_.resize(max_subfragments * (sizeof(subfragment_identifier) + subfragment_size_)); 00238 00239 if (TransferInterface::role() == Role::kReceive) 00240 { 00241 book_container_of_buffers(receive_buffers_, max_fragment_size_words_, max_subfragments, 0, max_subfragments - 1); 00242 } 00243 00244 TLOG(TLVL_DEBUG) << GetTraceName() << ": max_subfragments is " << max_subfragments ; 00245 TLOG(TLVL_DEBUG) << GetTraceName() << ": Staging buffer size is " << staging_memory_.size() ; 00246 } 00247 00248 #pragma GCC diagnostic push 00249 #pragma GCC diagnostic ignored "-Wunused-variable" 00250 00251 int artdaq::MulticastTransfer::receiveFragment(artdaq::Fragment& fragment, 00252 size_t receiveTimeout) 00253 { 00254 assert(TransferInterface::role() == Role::kReceive); 00255 00256 if (fragment.dataSizeBytes() > 0) 00257 { 00258 throw cet::exception("MulticastTransfer") << "Error in MulticastTransfer::receiveFragmentFrom: " << 00259 "nonzero payload found in fragment passed as argument"; 00260 } 00261 00262 static bool print_warning = true; 00263 00264 if (print_warning) 00265 { 00266 std::cerr << "Please note that MulticastTransfer::receiveFragmentFrom does not use its receiveTimeout argument" << std::endl; 00267 print_warning = false; 00268 } 00269 00270 fragment.resizeBytes(max_fragment_size_words_ - sizeof(artdaq::detail::RawFragmentHeader)); 00271 00272 static auto current_sequenceID = std::numeric_limits<Fragment::sequence_id_t>::max(); 00273 static auto current_fragmentID = std::numeric_limits<Fragment::fragment_id_t>::max(); 00274 00275 size_t fragment_size = 0; 00276 size_t expected_subfragments = 0; 00277 size_t current_subfragments = 0; 00278 bool fragment_complete = false; 00279 bool last_fragment_truncated = false; 00280 00281 while (true) 00282 { 00283 auto bytes_received = socket_->receive_from(receive_buffers_, *opposite_endpoint_); 00284 00285 size_t bytes_processed = 0; 00286 00287 for (auto& buf : receive_buffers_) 00288 { 00289 auto buf_size = boost::asio::buffer_size(buf); 00290 auto size_t_ptr = boost::asio::buffer_cast<const size_t*>(buf); 00291 auto seqID = *size_t_ptr; 00292 auto fragID = *(size_t_ptr + 1); 00293 auto subfragID = *(size_t_ptr + 2); 00294 00295 if (seqID != current_sequenceID || fragID != current_fragmentID) 00296 { 00297 // JCF, Jun-22-2016 00298 // Code currently operates under the assumption that all subfragments from the call are from the same fragment 00299 00300 assert(bytes_processed == 0); 00301 00302 if (current_subfragments < expected_subfragments) 00303 { 00304 last_fragment_truncated = true; 00305 00306 if (expected_subfragments != std::numeric_limits<size_t>::max()) 00307 { 00308 std::cerr << "Warning: only received " << current_subfragments << " subfragments for fragment with seqID = " << 00309 current_sequenceID << ", fragID = " << current_fragmentID << " (expected " << expected_subfragments << ")\n" 00310 << std::endl; 00311 } 00312 else 00313 { 00314 std::cerr << "Warning: only received " << current_subfragments << 00315 " subfragments for fragment with seqID = " << 00316 current_sequenceID << ", fragID = " << current_fragmentID << 00317 ", # of expected subfragments is unknown as fragment header was not received)\n" 00318 << std::endl; 00319 } 00320 } 00321 00322 current_subfragments = 0; 00323 fragment_size = std::numeric_limits<size_t>::max(); 00324 expected_subfragments = std::numeric_limits<size_t>::max(); 00325 current_sequenceID = seqID; 00326 current_fragmentID = fragID; 00327 } 00328 00329 auto ptr_into_fragment = fragment.headerBeginBytes() + subfragID * subfragment_size_; 00330 00331 auto ptr_into_buffer = boost::asio::buffer_cast<const byte_t*>(buf) + sizeof(subfragment_identifier); 00332 00333 std::copy(ptr_into_buffer, ptr_into_buffer + buf_size - sizeof(subfragment_identifier), ptr_into_fragment); 00334 00335 if (subfragID == 0) 00336 { 00337 if (buf_size >= sizeof(subfragment_identifier) + sizeof(artdaq::detail::RawFragmentHeader)) 00338 { 00339 auto payload_size = std::numeric_limits<size_t>::max(); 00340 get_fragment_quantities(buf, payload_size, fragment_size, expected_subfragments); 00341 00342 fragment.resizeBytes(payload_size); 00343 } 00344 else 00345 { 00346 throw cet::exception("MulticastTransfer") << "Buffer size is too small to completely contain an artdaq::Fragment header; " << 00347 "please increase the default size"; 00348 } 00349 } 00350 00351 current_subfragments++; 00352 00353 if (current_subfragments == expected_subfragments) 00354 { 00355 fragment_complete = true; 00356 } 00357 00358 bytes_processed += buf_size; 00359 00360 if (bytes_processed >= bytes_received) 00361 { 00362 break; 00363 } 00364 } 00365 00366 if (last_fragment_truncated) 00367 { 00368 // JCF, 7-7-2017 00369 00370 // Don't yet have code to handle the scenario where the set of 00371 // subfragments received in the last iteration of the loop was 00372 // its own complete fragment, but we know the previous fragment 00373 // to be incomplete 00374 00375 assert(!fragment_complete); 00376 TLOG(TLVL_WARNING) << GetTraceName() << ": Got an incomplete fragment" ; 00377 return artdaq::TransferInterface::RECV_TIMEOUT; 00378 } 00379 00380 if (fragment_complete) 00381 { 00382 return source_rank(); 00383 } 00384 } 00385 00386 return TransferInterface::RECV_TIMEOUT; 00387 } 00388 00389 #pragma GCC diagnostic pop 00390 00391 int artdaq::MulticastTransfer::receiveFragmentHeader(detail::RawFragmentHeader& header, size_t receiveTimeout) 00392 { 00393 auto ret = receiveFragment(fragment_buffer_, receiveTimeout); 00394 if (ret == source_rank()) 00395 { 00396 header = *reinterpret_cast<detail::RawFragmentHeader*>(fragment_buffer_.headerAddress()); 00397 return source_rank(); 00398 } 00399 return ret; 00400 } 00401 00402 int artdaq::MulticastTransfer::receiveFragmentData(RawDataType* destination, size_t wordCount) 00403 { 00404 if (fragment_buffer_.size() > detail::RawFragmentHeader::num_words()) { 00405 auto dataSize = (fragment_buffer_.size() - detail::RawFragmentHeader::num_words()) * sizeof(RawDataType); 00406 memcpy(destination, fragment_buffer_.headerAddress() + detail::RawFragmentHeader::num_words(), dataSize); 00407 return source_rank(); 00408 } 00409 return RECV_TIMEOUT; 00410 } 00411 00412 00413 // Reliable transport is undefined for multicast; just use copy 00414 artdaq::TransferInterface::CopyStatus 00415 artdaq::MulticastTransfer::transfer_fragment_reliable_mode(artdaq::Fragment&& f) 00416 { 00417 return transfer_fragment_min_blocking_mode(f, 100000000); 00418 } 00419 00420 artdaq::TransferInterface::CopyStatus 00421 artdaq::MulticastTransfer::transfer_fragment_min_blocking_mode(artdaq::Fragment const& fragment, 00422 size_t send_timeout_usec) 00423 { 00424 assert(TransferInterface::role() == Role::kSend); 00425 00426 if (fragment.sizeBytes() > max_fragment_size_words_) 00427 { 00428 throw cet::exception("MulticastTransfer") << "Error in MulticastTransfer::copyFragmentTo: " << 00429 fragment.sizeBytes() << " byte fragment exceeds max_fragment_size of " << max_fragment_size_words_; 00430 } 00431 00432 static size_t ncalls = 1; 00433 auto num_subfragments = static_cast<size_t>(std::ceil(fragment.sizeBytes() / static_cast<float>(subfragment_size_))); 00434 00435 ncalls++; 00436 00437 fill_staging_memory(fragment); 00438 00439 for (size_t batch_index = 0; ; batch_index++) 00440 { 00441 auto first_subfragment = batch_index * subfragments_per_send_; 00442 auto last_subfragment = (batch_index + 1) * subfragments_per_send_ >= num_subfragments ? 00443 num_subfragments - 1 : 00444 (batch_index + 1) * subfragments_per_send_ - 1; 00445 00446 std::vector<boost::asio::const_buffer> buffers; 00447 00448 book_container_of_buffers(buffers, fragment.sizeBytes(), num_subfragments, first_subfragment, last_subfragment); 00449 00450 socket_->send_to(buffers, *multicast_endpoint_); 00451 00452 usleep(pause_on_copy_usecs_); 00453 00454 if (last_subfragment == num_subfragments - 1) 00455 { 00456 break; 00457 } 00458 } 00459 return CopyStatus::kSuccess; 00460 } 00461 00462 #pragma GCC diagnostic push 00463 #pragma GCC diagnostic ignored "-Wsign-compare" 00464 00465 void artdaq::MulticastTransfer::fill_staging_memory(const artdaq::Fragment& fragment) 00466 { 00467 auto num_subfragments = static_cast<size_t>(std::ceil(fragment.sizeBytes() / static_cast<float>(subfragment_size_))); 00468 TLOG(TLVL_DEBUG) << GetTraceName() << ": # of subfragments to use is " << num_subfragments ; 00469 00470 for (auto i_s = 0; i_s < num_subfragments; ++i_s) 00471 { 00472 auto staging_memory_copyto = &staging_memory_.at(i_s * (sizeof(subfragment_identifier) + subfragment_size_)); 00473 00474 subfragment_identifier sfi(fragment.sequenceID(), fragment.fragmentID(), i_s); 00475 00476 std::copy(reinterpret_cast<byte_t*>(&sfi), 00477 reinterpret_cast<byte_t*>(&sfi) + sizeof(subfragment_identifier), 00478 staging_memory_copyto); 00479 00480 auto low_ptr_into_fragment = fragment.headerBeginBytes() + subfragment_size_ * i_s; 00481 00482 auto high_ptr_into_fragment = (i_s == num_subfragments - 1) ? 00483 fragment.dataEndBytes() : 00484 fragment.headerBeginBytes() + subfragment_size_ * (i_s + 1); 00485 00486 std::copy(low_ptr_into_fragment, 00487 high_ptr_into_fragment, 00488 staging_memory_copyto + sizeof(subfragment_identifier)); 00489 } 00490 } 00491 00492 #pragma GCC diagnostic pop 00493 00494 // Note that book_container_of_buffers includes, rather than excludes, 00495 // "last_subfragment_num"; in this regard it's different than the way 00496 // STL functions receive iterators. Note also that the lowest possible 00497 // value for "first_subfragment_num" is 0, not 1. 00498 00499 template <typename T> 00500 void artdaq::MulticastTransfer::book_container_of_buffers(std::vector<T>& buffers, 00501 const size_t fragment_size, 00502 const size_t total_subfragments, 00503 const size_t first_subfragment_num, 00504 const size_t last_subfragment_num) 00505 { 00506 assert(staging_memory_.size() >= total_subfragments * (sizeof(subfragment_identifier) + subfragment_size_)); 00507 assert(buffers.size() == 0); 00508 assert(last_subfragment_num < total_subfragments); 00509 00510 for (auto i_f = first_subfragment_num; i_f <= last_subfragment_num; ++i_f) 00511 { 00512 auto bytes_to_store = (i_f == total_subfragments - 1) ? 00513 sizeof(subfragment_identifier) + (fragment_size - (total_subfragments - 1) * subfragment_size_) : 00514 sizeof(subfragment_identifier) + subfragment_size_; 00515 00516 buffers.emplace_back(&staging_memory_.at(i_f * (sizeof(subfragment_identifier) + subfragment_size_)), 00517 bytes_to_store); 00518 } 00519 } 00520 00521 00522 #pragma GCC diagnostic push // Needed since profile builds will ignore the assert 00523 #pragma GCC diagnostic ignored "-Wunused-variable" 00524 00525 void artdaq::MulticastTransfer::get_fragment_quantities(const boost::asio::mutable_buffer& buf, size_t& payload_size, 00526 size_t& fragment_size, 00527 size_t& expected_subfragments) 00528 { 00529 byte_t* buffer_ptr = boost::asio::buffer_cast<byte_t*>(buf); 00530 00531 auto subfragment_num = *(reinterpret_cast<size_t*>(buffer_ptr) + 2); 00532 00533 assert(subfragment_num == 0); 00534 00535 artdaq::detail::RawFragmentHeader* header = 00536 reinterpret_cast<artdaq::detail::RawFragmentHeader*>(buffer_ptr + sizeof(subfragment_identifier)); 00537 00538 fragment_size = header->word_count * sizeof(artdaq::RawDataType); 00539 00540 auto metadata_size = header->metadata_word_count * sizeof(artdaq::RawDataType); 00541 payload_size = fragment_size - metadata_size - artdaq::detail::RawFragmentHeader::num_words() * 00542 sizeof(artdaq::RawDataType); 00543 00544 assert(fragment_size == 00545 artdaq::detail::RawFragmentHeader::num_words() * sizeof(artdaq::RawDataType) + 00546 metadata_size + 00547 payload_size); 00548 00549 expected_subfragments = static_cast<size_t>(std::ceil(fragment_size / static_cast<float>(subfragment_size_))); 00550 } 00551 #pragma GCC diagnostic pop 00552 00553 void artdaq::MulticastTransfer::set_receive_buffer_size(size_t recv_buff_size) 00554 { 00555 if (recv_buff_size == 0) return; 00556 boost::asio::socket_base::receive_buffer_size actual_recv_buff_size; 00557 socket_->get_option(actual_recv_buff_size); 00558 00559 TLOG(TLVL_DEBUG) << GetTraceName() << ": Receive buffer size is currently " << actual_recv_buff_size.value() << 00560 " bytes, will try to change it to " << recv_buff_size ; 00561 00562 boost::asio::socket_base::receive_buffer_size recv_buff_option(recv_buff_size); 00563 00564 boost::system::error_code ec; 00565 socket_->set_option(recv_buff_option, ec); 00566 00567 if (ec != 0) 00568 { 00569 std::cerr << "boost::system::error_code with value " << ec << 00570 " was found in attempt to change receive buffer" << std::endl; 00571 } 00572 00573 socket_->get_option(actual_recv_buff_size); 00574 TLOG(TLVL_DEBUG) << GetTraceName() << ": After attempted change, receive buffer size is now " << actual_recv_buff_size.value() ; 00575 } 00576 00577 #pragma GCC diagnostic pop 00578 00579 DEFINE_ARTDAQ_TRANSFER(artdaq::MulticastTransfer)