1 #include "artdaq/TransferPlugins/MPITransfer.hh"
4 #include "canvas/Utilities/Exception.h"
5 #include "cetlib/exception.h"
7 #include "artdaq-core/Data/Fragment.hh"
19 std::mutex artdaq::MPITransfer::mpi_mutex_;
23 , src_status_(status_t::SENDING)
26 , payload_(buffer_count_)
27 , synchronous_sends_(pset.get<bool>(
"synchronous_sends", true))
28 , reqs_(buffer_count_, MPI_REQUEST_NULL)
32 std::ostringstream debugstream;
33 debugstream <<
"MPITransfer construction: "
37 TRACE(TLVL_TRACE, debugstream.str().c_str());
42 throw art::Exception(art::errors::Configuration,
"MPITransfer: ")
43 <<
"No buffers configured.\n";
60 TRACE(TLVL_TRACE,
"MPITransfer::~MPITransfer: BEGIN");
64 TRACE(TLVL_TRACE,
"MPITransfer::~MPITransfer: Cancelling all reqs");
65 for (
size_t i = 0; i < buffer_count_; ++i)
70 TRACE(TLVL_TRACE,
"MPITransfer::~MPITransfer: Collecting requests that need to be waited on");
71 std::vector<MPI_Request> reqs;
72 for (
size_t ii = 0; ii < reqs_.size(); ++ii)
74 if (reqs_[ii] != MPI_REQUEST_NULL)
76 reqs.push_back(reqs_[ii]);
81 TRACE(TLVL_TRACE,
"MPITransfer::~MPITransfer: Waiting on %zu reqs.", reqs.size());
82 MPI_Waitall(reqs.size(), &reqs[0], MPI_STATUSES_IGNORE);
87 TRACE(TLVL_TRACE,
"MPITransfer::~MPITransfer: DONE");
94 TRACE(6,
"MPITransfer::receiveFragment entered tmo=%lu us", timeout_usec);
100 if (timeout_usec > 0)
103 if (ready_indices_.size() == 0) {
104 ready_indices_.resize(buffer_count_, -1);
105 ready_statuses_.resize(buffer_count_);
108 wait_result = MPI_Testsome(buffer_count_, &(reqs_[0]), &readyCount, &(ready_indices_[0]), &(ready_statuses_[0]));
109 if (readyCount > 0) {
110 saved_wait_result_ = wait_result;
111 ready_indices_.resize(readyCount);
112 ready_statuses_.resize(readyCount);
115 size_t sleep_loops = 10;
116 size_t sleep_time = timeout_usec / sleep_loops;
117 if (sleep_time > 250) {
119 sleep_loops = timeout_usec / sleep_time;
121 for (
size_t idx = 0; idx < sleep_loops; ++idx) {
123 wait_result = MPI_Testsome(buffer_count_, &reqs_[0], &readyCount,
124 &ready_indices_[0], &ready_statuses_[0]);
125 if (readyCount > 0) {
break; }
127 if (readyCount > 0) {
128 saved_wait_result_ = wait_result;
129 ready_indices_.resize(readyCount);
130 ready_statuses_.resize(readyCount);
133 ready_indices_.clear();
134 ready_statuses_.clear();
138 if (ready_indices_.size() > 0) {
139 wait_result = saved_wait_result_;
140 which = ready_indices_.front();
141 status = ready_statuses_.front();
142 ready_indices_.erase(ready_indices_.begin());
143 ready_statuses_.erase(ready_statuses_.begin());
152 std::unique_lock<std::mutex> lk(mpi_mutex_);
153 wait_result = MPI_Testany(buffer_count_, &reqs_[0], &which, &flag, &status);
157 size_t sleep_loops = 10;
158 size_t sleep_time = timeout_usec / sleep_loops;
159 if (sleep_time > 250)
162 sleep_loops = timeout_usec / sleep_time;
164 for (
size_t idx = 0; idx < sleep_loops; ++idx)
169 std::unique_lock<std::mutex> lk(mpi_mutex_);
170 wait_result = MPI_Testany(buffer_count_, &reqs_[0], &which, &flag, &status);
172 if (flag || which >= 0) {
break; }
184 std::unique_lock<std::mutex> lk(mpi_mutex_);
185 wait_result = MPI_Waitany(buffer_count_, &reqs_[0], &which, &status);
189 TRACE(8,
"recvFragment recvd");
191 if (which == MPI_UNDEFINED)
193 throw art::Exception(art::errors::LogicError,
"MPITransfer: ")
194 <<
"MPI_UNDEFINED returned as on index value from Waitany.\n";
196 if (reqs_[which] != MPI_REQUEST_NULL)
198 throw art::Exception(art::errors::LogicError,
"MPITransfer: ")
199 <<
"INTERNAL ERROR: req is not MPI_REQUEST_NULL in recvFragment.\n";
201 Fragment::sequence_id_t sequence_id = payload_[which].sequenceID();
204 std::ostringstream debugstream;
205 debugstream <<
"recv: " << my_rank
207 <<
" Waitany_error=" << wait_result
208 <<
" status_error=" << status.MPI_ERROR
209 <<
" source=" << status.MPI_SOURCE
210 <<
" tag=" << status.MPI_TAG
211 <<
" Fragment_sequenceID=" << sequence_id
212 <<
" Fragment_size=" << payload_[which].size()
213 <<
" preAutoResize_Fragment_dataSize=" << payload_[which].dataSize()
214 <<
" fragID=" << payload_[which].fragmentID()
217 TRACE(4, debugstream.str().c_str());
219 char err_buffer[MPI_MAX_ERROR_STRING];
225 case MPI_ERR_IN_STATUS:
226 MPI_Error_string(status.MPI_ERROR, err_buffer, &resultlen);
227 TLOG_ERROR(uniqueLabel())
228 <<
"MPITransfer: Waitany ERROR: " << err_buffer <<
"\n" << TLOG_ENDL;
231 MPI_Error_string(wait_result, err_buffer, &resultlen);
232 TLOG_ERROR(uniqueLabel())
233 <<
"MPITransfer: Waitany ERROR: " << err_buffer <<
"\n" << TLOG_ENDL;
237 TRACE(7,
"recvFragment before autoResize/swap");
239 payload_[which].autoResize();
240 output.swap(payload_[which]);
241 TRACE(7,
"recvFragment after autoResize/swap seqID=%lu. "
242 "Reset our buffer. max=%zu adr=%p"
243 , output.sequenceID(), max_fragment_size_words_, (
void*)output.headerAddress());
245 Fragment tmp(max_fragment_size_words_);
246 TRACE(7,
"recvFragment before payload_[which].swap(tmp) adr=%p", (
void*)tmp.headerAddress());
247 payload_[which].swap(tmp);
248 TRACE(7,
"recvFragment after payload_[which].swap(tmp)");
250 if (output.type() == Fragment::EndOfDataFragmentType)
252 src_status_ = status_t::PENDING;
253 expected_count_ = *output.dataBegin();
256 std::ostringstream debugstream;
257 debugstream <<
"Received EOD from source " << status.MPI_SOURCE
258 <<
" expecting total of "
259 << *output.dataBegin() <<
" fragments" <<
'\n';
261 TRACE(4, debugstream.str().c_str());
270 case status_t::PENDING:
272 std::ostringstream debugstream;
273 debugstream <<
"Checking received count "
275 <<
" against expected total "
279 TRACE(4, debugstream.str().c_str());
281 if (recvd_count_ == expected_count_)
283 src_status_ = status_t::DONE;
287 throw art::Exception(art::errors::LogicError,
"MPITransfer: ")
288 <<
"Received extra fragments from source "
291 case status_t::SENDING:
294 throw art::Exception(art::errors::LogicError,
"MPITransfer: ")
295 <<
"INTERNAL ERROR: Unrecognized status_t value "
296 <<
static_cast<int>(src_status_)
300 if (src_status_ == status_t::DONE)
302 if (nextSource_() != MPI_ANY_SOURCE)
312 return status.MPI_SOURCE;
316 artdaq::MPITransfer::
321 if (src_status_ != status_t::DONE)
323 return source_rank();
325 return MPI_ANY_SOURCE;
329 artdaq::MPITransfer::
330 cancelReq_(
size_t buf,
bool blocking_wait)
332 if (reqs_[buf] == MPI_REQUEST_NULL)
return;
335 std::ostringstream debugstream;
336 debugstream <<
"Cancelling post for buffer "
339 TRACE(4, debugstream.str().c_str());
343 std::unique_lock<std::mutex> lk(mpi_mutex_);
344 int result = MPI_Cancel(&reqs_[buf]);
345 if (result == MPI_SUCCESS)
350 MPI_Wait(&reqs_[buf], &status);
355 MPI_Test(&reqs_[buf], &doneFlag, &status);
358 size_t sleep_loops = 10;
359 size_t sleep_time = 100000;
360 for (
size_t idx = 0; idx < sleep_loops; ++idx)
363 MPI_Test(&reqs_[buf], &doneFlag, &status);
364 if (doneFlag) {
break; }
368 TLOG_ERROR(uniqueLabel())
369 <<
"MPITransfer::cancelReq_: Timeout waiting to cancel the request for MPI buffer "
379 case MPI_ERR_REQUEST:
380 throw art::Exception(art::errors::LogicError,
"MPITransfer: ")
381 <<
"MPI_Cancel returned MPI_ERR_REQUEST.\n";
383 throw art::Exception(art::errors::LogicError,
"MPITransfer: ")
384 <<
"MPI_Cancel returned MPI_ERR_ARG.\n";
386 throw art::Exception(art::errors::LogicError,
"MPITransfer: ")
387 <<
"MPI_Cancel returned unknown error code.\n";
393 artdaq::MPITransfer::
397 std::ostringstream debugstream;
398 debugstream <<
"Posting buffer " << buf
399 <<
" size=" << payload_[buf].size()
400 <<
" header address=0x" << std::hex << payload_[buf].headerAddress() << std::dec
402 TRACE(4, debugstream.str().c_str());
406 std::unique_lock<std::mutex> lk(mpi_mutex_);
407 MPI_Irecv(&*payload_[buf].headerBegin(),
408 (payload_[buf].size() *
sizeof(Fragment::value_type)),
416 int artdaq::MPITransfer::findAvailable()
421 TRACE(5,
"findAvailable initial pos_=%d", pos_);
425 std::unique_lock<std::mutex> lk(mpi_mutex_);
426 MPI_Test(&reqs_[use_me], &flag, MPI_STATUS_IGNORE);
427 pos_ = (pos_ + 1) % buffer_count_;
429 }
while (!flag && loops < buffer_count_);
431 TRACE(5,
"findAvailable returning use_me=%d loops=%zu", use_me, loops);
441 return sendFragment(std::move(frag), send_timeout_usec,
false);
448 return sendFragment(std::move(frag), send_timeout_usec,
true);
453 artdaq::MPITransfer::
454 sendFragment(Fragment&& frag,
size_t send_timeout_usec,
bool force_async)
456 TRACE(5,
"copyFragmentTo timeout unused: %zu", send_timeout_usec);
457 if (frag.dataSize() > max_fragment_size_words_)
459 throw cet::exception(
"Unimplemented")
460 <<
"Currently unable to deal with overlarge fragment payload ("
463 << max_fragment_size_words_
467 TRACE(5,
"MPITransfer::sendFragment: Checking whether to force async mode...");
468 if (frag.type() == Fragment::EndOfDataFragmentType)
470 TRACE(5,
"MPITransfer::sendFragment: EndOfDataFragment detected. Forcing async mode");
473 TRACE(5,
"MPITransfer::sendFragment: Finding available buffer");
474 int buffer_idx = findAvailable();
477 TRACE(TLVL_WARNING,
"MPITransfer::sendFragment: No buffers available! Returning RECV_TIMEOUT!");
478 return CopyStatus::kTimeout;
480 TRACE(5,
"MPITransfer::sendFragment: Swapping in fragment to send to buffer %d", buffer_idx);
481 Fragment& curfrag = payload_[buffer_idx];
482 curfrag = std::move(frag);
483 TRACE(5,
"sendFragTo before send src=%d dest=%d seqID=%lu type=%d found_idx=%d"
484 , source_rank(), destination_rank(), curfrag.sequenceID(), curfrag.type(), buffer_idx);
485 std::unique_lock<std::mutex> lk(mpi_mutex_);
486 if (!synchronous_sends_ || force_async)
489 TRACE(5,
"MPITransfer::sendFragment: Using MPI_Isend");
490 MPI_Isend(&*curfrag.headerBegin(),
491 curfrag.size() *
sizeof(Fragment::value_type),
505 TRACE(5,
"MPITransfer::sendFragment: Using MPI_Ssend");
506 MPI_Ssend(&*curfrag.headerBegin(),
507 curfrag.size() *
sizeof(Fragment::value_type),
513 TRACE(5,
"sendFragTo COMPLETE");
516 std::ostringstream debugstream;
517 debugstream <<
"send COMPLETE: "
518 <<
" buffer_idx=" << buffer_idx
519 <<
" send_size=" << curfrag.size()
520 <<
" src=" << source_rank()
521 <<
" dest=" << destination_rank()
522 <<
" sequenceID=" << curfrag.sequenceID()
523 <<
" fragID=" << curfrag.fragmentID()
525 TRACE(11, debugstream.str().c_str());
527 return CopyStatus::kSuccess;
size_t buffer_count_
The number of Fragment transfers the TransferInterface can handle simultaneously. ...
virtual ~MPITransfer()
MPITransfer Destructor.
int receiveFragment(Fragment &frag, size_t timeout_usec) override
Receive a Fragment using MPI.
MPITransfer(fhicl::ParameterSet pset, Role role)
MPITransfer Constructor.
static const int RECV_TIMEOUT
Value to be returned upon receive timeout. Because receivers otherwise return rank, this is also the limit on the number of ranks that artdaq currently supports.
int destination_rank() const
Get the destination rank for this TransferInterface instance.
This TransferInterface is a Receiver.
int source_rank() const
Get the source rank for this TransferInterface instance.
CopyStatus moveFragment(Fragment &&frag, size_t timeout_usec) override
Move a Fragment to the destination.
Role
Used to determine if a TransferInterface is a Sender or Receiver.
This interface defines the functions used to transfer data between artdaq applications.
MPITransfer is a TransferInterface implementation plugin that transfers data using MPI...
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.
const size_t max_fragment_size_words_
The maximum size of the transferred Fragment objects, in artdaq::Fragment::RawDataType words...
CopyStatus copyFragment(Fragment &frag, size_t timeout_usec) override
Copy a Fragment to the destination. Forces asynchronous send.