1 #define TRACE_NAME "MPITransfer"
2 #include "artdaq/TransferPlugins/MPITransfer.hh"
5 #include "canvas/Utilities/Exception.h"
6 #include "cetlib_except/exception.h"
8 #include "artdaq-core/Data/Fragment.hh"
20 #define MPI_TAG_HEADER 0x8E // 142
21 #define MPI_TAG_DATA 0xDA // 218
24 std::mutex artdaq::MPITransfer::mpi_mutex_;
28 , reqs_(2 * buffer_count_, MPI_REQUEST_NULL)
29 , payload_(buffer_count_)
32 TLOG_TRACE(
"MPITransfer") <<
uniqueLabel() <<
" MPITransfer construction: "
39 throw art::Exception(art::errors::Configuration,
"MPITransfer: ")
40 <<
"No buffers configured.";
47 TLOG_TRACE(
"MPITransfer") << uniqueLabel() <<
" MPITransfer::~MPITransfer: BEGIN" << TLOG_ENDL;
48 TLOG_TRACE(
"MPITransfer") << uniqueLabel() <<
" MPITransfer::~MPITransfer: Collecting requests that need to be waited on" << TLOG_ENDL;
49 std::vector<MPI_Request> reqs;
50 for (
size_t ii = 0; ii < reqs_.size(); ++ii)
52 if (reqs_[ii] != MPI_REQUEST_NULL)
54 reqs.push_back(reqs_[ii]);
59 TLOG_TRACE(
"MPITransfer") << uniqueLabel() <<
"MPITransfer::~MPITransfer: Waiting on " << std::to_string(reqs.size()) <<
" reqs." << TLOG_ENDL;
60 MPI_Waitall(reqs.size(), &reqs[0], MPI_STATUSES_IGNORE);
66 TLOG_TRACE(
"MPITransfer") << uniqueLabel() <<
" MPITransfer::~MPITransfer: DONE" << TLOG_ENDL;
73 return moveFragment(std::move(frag), send_timeout_usec);
80 if (frag.dataSize() > max_fragment_size_words_)
82 TLOG_WARNING(
"MPITransfer") << uniqueLabel() <<
" Fragment has size (" << frag.dataSize() <<
") larger than max_fragment_size_words_ (" << max_fragment_size_words_ <<
")."
83 <<
" Total buffer space is: " << max_fragment_size_words_ * buffer_count_ <<
" words. Multiple over-size Fragments will exhaust the buffer!" << TLOG_ENDL;
86 auto start_time = std::chrono::steady_clock::now();
88 TLOG_ARB(5,
"MPITransfer") << uniqueLabel() <<
" MPITransfer::moveFragment: Finding available send slot, send_timeout_usec=" << std::to_string(send_timeout_usec) << TLOG_ENDL;
89 auto req_idx = findAvailable();
91 while (req_idx == RECV_TIMEOUT && TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_timeout_usec)
94 req_idx = findAvailable();
96 if (counter % 1000 == 0)
98 TLOG_INFO(
"MPITransfer") << uniqueLabel() <<
" Rank " << source_rank() <<
" waiting for available buffer to " << destination_rank() <<
". "
99 <<
"Waited " << std::to_string(TimeUtils::GetElapsedTimeMilliseconds(start_time)) <<
" ms so far." << TLOG_ENDL;
105 TLOG_WARNING(
"MPITransfer") << uniqueLabel() <<
" MPITransfer::sendFragment: No buffers available! Returning RECV_TIMEOUT!" << TLOG_ENDL;
106 return CopyStatus::kTimeout;
109 TLOG_ARB(5,
"MPITransfer") << uniqueLabel() <<
" MPITransfer::moveFragment send slot is " << req_idx << TLOG_ENDL;
110 auto buffer_idx = req_idx / 2;
111 TLOG_ARB(5,
"MPITransfer") << uniqueLabel() <<
" MPITransfer::moveFragment: Swapping in fragment to send to buffer " << buffer_idx << TLOG_ENDL;
112 Fragment& curfrag = payload_[buffer_idx];
113 curfrag = std::move(frag);
115 TLOG_ARB(5,
"MPITransfer") << uniqueLabel() <<
" MPITransfer::moveFragment before send src=" << source_rank() <<
" dest=" << destination_rank() <<
" seqID=" << std::to_string(curfrag.sequenceID()) <<
" type=" << curfrag.typeString() <<
" found_idx=" << req_idx << TLOG_ENDL;
117 std::unique_lock<std::mutex> lk(mpi_mutex_);
120 TLOG_ARB(5,
"MPITransfer") << uniqueLabel() <<
" MPITransfer::moveFragment: Using MPI_Isend" << TLOG_ENDL;
122 MPI_Issend(curfrag.headerAddress(), detail::RawFragmentHeader::num_words() *
sizeof(RawDataType), MPI_BYTE, destination_rank(), MPI_TAG_HEADER, MPI_COMM_WORLD, &reqs_[req_idx]);
124 auto sizeWrds = curfrag.size() - detail::RawFragmentHeader::num_words();
125 auto offset = curfrag.headerAddress() + detail::RawFragmentHeader::num_words();
126 MPI_Issend(offset, sizeWrds *
sizeof(RawDataType), MPI_BYTE, destination_rank(), MPI_TAG_DATA, MPI_COMM_WORLD, &reqs_[req_idx + 1]);
127 TLOG_ARB(5,
"MPITransfer") << uniqueLabel() <<
" MPITransfer::moveFragment COMPLETE" << TLOG_ENDL;
129 TLOG_ARB(11,
"MPITransfer") << uniqueLabel() <<
" MPITransfer::moveFragment COMPLETE: "
130 <<
" buffer_idx=" << buffer_idx
131 <<
" send_size=" << curfrag.size()
132 <<
" src=" << source_rank()
133 <<
" dest=" << destination_rank()
134 <<
" sequenceID=" << curfrag.sequenceID()
135 <<
" fragID=" << curfrag.fragmentID() << TLOG_ENDL;
136 return CopyStatus::kSuccess;
141 TLOG_ARB(6,
"MPITransfer") << uniqueLabel() <<
" MPITransfer::receiveFragmentHeader entered tmo=" << std::to_string(timeout_usec) <<
" us (ignored)" << TLOG_ENDL;
143 int wait_result = MPI_SUCCESS;
147 std::unique_lock<std::mutex> lk(mpi_mutex_);
148 MPI_Irecv(&header, header.num_words() *
sizeof(RawDataType), MPI_BYTE, source_rank(), MPI_TAG_HEADER, MPI_COMM_WORLD, &req);
154 std::unique_lock<std::mutex> lk(mpi_mutex_);
155 wait_result = MPI_Test(&req, &flag, &status);
162 if (req != MPI_REQUEST_NULL)
164 TLOG_ERROR(
"MPITransfer") << uniqueLabel() <<
" INTERNAL ERROR: req is not MPI_REQUEST_NULL in receiveFragmentHeader." << TLOG_ENDL;
165 TLOG(TLVL_ERROR) << uniqueLabel() <<
" INTERNAL ERROR: req is not MPI_REQUEST_NULL in receiveFragmentHeader." << TLOG_ENDL;
166 throw art::Exception(art::errors::LogicError,
"MPITransfer: ") <<
"INTERNAL ERROR: req is not MPI_REQUEST_NULL in receiveFragmentHeader.";
169 TLOG_ARB(8,
"MPITransfer") << uniqueLabel() <<
" MPITransfer::receiveFragmentHeader recvd" << TLOG_ENDL;
172 {TLOG_ARB(8,
"MPITransfer") << uniqueLabel() <<
" MPITransfer::receiveFragmentHeader: " << my_rank
173 <<
" Wait_error=" << wait_result
174 <<
" status_error=" << status.MPI_ERROR
175 <<
" source=" << status.MPI_SOURCE
176 <<
" tag=" << status.MPI_TAG
177 <<
" Fragment_sequenceID=" << (uint64_t)header.sequence_id
178 <<
" Fragment_size=" << header.word_count
179 <<
" fragID=" << header.fragment_id << TLOG_ENDL;
181 char err_buffer[MPI_MAX_ERROR_STRING];
187 case MPI_ERR_IN_STATUS:
188 MPI_Error_string(status.MPI_ERROR, err_buffer, &resultlen);
189 TLOG_ERROR(
"MPITransfer") << uniqueLabel()
190 <<
" MPITransfer: Waitany ERROR: " << err_buffer <<
"\n" << TLOG_ENDL;
193 MPI_Error_string(wait_result, err_buffer, &resultlen);
194 TLOG_ERROR(
"MPITransfer") << uniqueLabel()
195 <<
" MPITransfer: Waitany ERROR: " << err_buffer <<
"\n" << TLOG_ENDL;
199 return status.MPI_SOURCE;
204 TLOG_ARB(6,
"MPITransfer") << uniqueLabel() <<
" MPITransfer::receiveFragmentData entered wordCount=" << std::to_string(wordCount) << TLOG_ENDL;
205 int wait_result = MPI_SUCCESS;
210 std::unique_lock<std::mutex> lk(mpi_mutex_);
211 MPI_Irecv(destination, wordCount *
sizeof(RawDataType), MPI_BYTE, source_rank(), MPI_TAG_DATA, MPI_COMM_WORLD, &req);
217 std::unique_lock<std::mutex> lk(mpi_mutex_);
218 wait_result = MPI_Test(&req, &flag, &status);
224 if (req != MPI_REQUEST_NULL)
226 TLOG_ERROR(
"MPITransfer") << uniqueLabel() <<
" INTERNAL ERROR: req is not MPI_REQUEST_NULL in receiveFragmentData." << TLOG_ENDL;
227 throw art::Exception(art::errors::LogicError,
"MPITransfer: ") <<
"INTERNAL ERROR: req is not MPI_REQUEST_NULL in receiveFragmentData.";
231 TLOG_ARB(8,
"MPITransfer") << uniqueLabel() <<
" MPITransfer::receiveFragmentData recvd" << TLOG_ENDL;
234 char err_buffer[MPI_MAX_ERROR_STRING];
240 case MPI_ERR_IN_STATUS:
241 MPI_Error_string(status.MPI_ERROR, err_buffer, &resultlen);
242 TLOG_ERROR(
"MPITransfer") << uniqueLabel()
243 <<
" MPITransfer: Waitany ERROR: " << err_buffer <<
"\n" << TLOG_ENDL;
246 MPI_Error_string(wait_result, err_buffer, &resultlen);
247 TLOG_ERROR(
"MPITransfer") << uniqueLabel()
248 <<
" MPITransfer: Waitany ERROR: " << err_buffer <<
"\n" << TLOG_ENDL;
252 return status.MPI_SOURCE;
256 artdaq::MPITransfer::
257 cancelReq_(MPI_Request req)
const
259 if (req == MPI_REQUEST_NULL)
return;
261 TLOG_ARB(8,
"MPITransfer") << uniqueLabel() <<
" Cancelling post" << TLOG_ENDL;
263 std::unique_lock<std::mutex> lk(mpi_mutex_);
264 int result = MPI_Cancel(&req);
265 if (result == MPI_SUCCESS)
268 MPI_Wait(&req, &status);
274 case MPI_ERR_REQUEST:
275 throw art::Exception(art::errors::LogicError,
"MPITransfer: ")
276 <<
"MPI_Cancel returned MPI_ERR_REQUEST.\n";
278 throw art::Exception(art::errors::LogicError,
"MPITransfer: ")
279 <<
"MPI_Cancel returned MPI_ERR_ARG.\n";
281 throw art::Exception(art::errors::LogicError,
"MPITransfer: ")
282 <<
"MPI_Cancel returned unknown error code.\n";
287 int artdaq::MPITransfer::findAvailable()
296 std::unique_lock<std::mutex> lk(mpi_mutex_);
297 MPI_Test(&reqs_[use_me], &flag, MPI_STATUS_IGNORE);
299 MPI_Test(&reqs_[use_me + 1], &flag2, MPI_STATUS_IGNORE);
301 pos_ = (pos_ + 2) % reqs_.size();
303 }
while (!flag2 && loops < buffer_count_);
305 TLOG_ARB(5,
"MPITransfer") << uniqueLabel() <<
" findAvailable returning use_me=" << use_me <<
" loops=" << std::to_string(loops) << TLOG_ENDL;
size_t buffer_count_
The number of Fragment transfers the TransferInterface can handle simultaneously. ...
virtual int source_rank() const
Get the source rank for this TransferInterface instance.
virtual ~MPITransfer()
MPITransfer Destructor.
CopyStatus moveFragment(Fragment &&frag, size_t timeout_usec=std::numeric_limits< size_t >::max()) override
Move a Fragment to the destination.
int receiveFragmentHeader(detail::RawFragmentHeader &header, size_t receiveTimeout) override
Receive a Fragment Header from the transport mechanism.
MPITransfer(fhicl::ParameterSet pset, Role role)
MPITransfer Constructor.
static const int RECV_TIMEOUT
Value to be returned upon receive timeout. Because receivers otherwise return rank, this is also the limit on the number of ranks that artdaq currently supports.
Role
Used to determine if a TransferInterface is a Sender or Receiver.
CopyStatus copyFragment(Fragment &frag, size_t timeout_usec=std::numeric_limits< size_t >::max()) override
Copy a Fragment to the destination. Forces asynchronous send.
std::string uniqueLabel() const
Get the unique label of this TransferInterface instance.
This interface defines the functions used to transfer data between artdaq applications.
virtual int destination_rank() const
Get the destination rank for this TransferInterface instance.
MPITransfer is a TransferInterface implementation plugin that transfers data using MPI...
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.
int receiveFragmentData(RawDataType *destination, size_t wordCount) override
Receive the body of a Fragment to the given destination pointer.