1 #include "artdaq/TransferPlugins/MPITransfer.hh"
4 #include "canvas/Utilities/Exception.h"
5 #include "cetlib_except/exception.h"
7 #include "artdaq-core/Data/Fragment.hh"
19 #define MPI_TAG_HEADER 0x8E // 142
20 #define MPI_TAG_DATA 0xDA // 218
23 std::mutex artdaq::MPITransfer::mpi_mutex_;
27 , reqs_(2 * buffer_count_, MPI_REQUEST_NULL)
28 , payload_(buffer_count_)
31 TLOG_TRACE(
uniqueLabel()) <<
"MPITransfer construction: "
38 throw art::Exception(art::errors::Configuration,
"MPITransfer: ")
39 <<
"No buffers configured.";
46 TLOG_TRACE(uniqueLabel()) <<
"MPITransfer::~MPITransfer: BEGIN" << TLOG_ENDL;
47 TLOG_TRACE(uniqueLabel()) <<
"MPITransfer::~MPITransfer: Collecting requests that need to be waited on" << TLOG_ENDL;
48 std::vector<MPI_Request> reqs;
49 for (
size_t ii = 0; ii < reqs_.size(); ++ii)
51 if (reqs_[ii] != MPI_REQUEST_NULL)
53 reqs.push_back(reqs_[ii]);
58 TLOG_TRACE(uniqueLabel()) <<
"MPITransfer::~MPITransfer: Waiting on " << std::to_string(reqs.size()) <<
" reqs." << TLOG_ENDL;
59 MPI_Waitall(reqs.size(), &reqs[0], MPI_STATUSES_IGNORE);
65 TLOG_TRACE(uniqueLabel()) <<
"MPITransfer::~MPITransfer: DONE" << TLOG_ENDL;
72 return moveFragment(std::move(frag), send_timeout_usec);
79 if (frag.dataSize() > max_fragment_size_words_)
81 TLOG_WARNING(uniqueLabel()) <<
"Fragment has size (" << frag.dataSize() <<
") larger than max_fragment_size_words_ (" << max_fragment_size_words_ <<
")."
82 <<
" Total buffer space is: " << max_fragment_size_words_ * buffer_count_ <<
" words. Multiple over-size Fragments will exhaust the buffer!" << TLOG_ENDL;
85 auto start_time = std::chrono::steady_clock::now();
87 TLOG_ARB(5, uniqueLabel()) <<
"MPITransfer::moveFragment: Finding available send slot, send_timeout_usec=" << std::to_string(send_timeout_usec) << TLOG_ENDL;
88 auto req_idx = findAvailable();
90 while (req_idx == RECV_TIMEOUT && TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_timeout_usec)
93 req_idx = findAvailable();
95 if (counter % 1000 == 0)
97 TLOG_INFO(uniqueLabel()) <<
"Rank " << source_rank() <<
" waiting for available buffer to " << destination_rank() <<
". "
98 <<
"Waited " << std::to_string(TimeUtils::GetElapsedTimeMilliseconds(start_time)) <<
" ms so far." << TLOG_ENDL;
104 TLOG_WARNING(uniqueLabel()) <<
"MPITransfer::sendFragment: No buffers available! Returning RECV_TIMEOUT!" << TLOG_ENDL;
105 return CopyStatus::kTimeout;
108 TLOG_ARB(5, uniqueLabel()) <<
"MPITransfer::moveFragment send slot is " << req_idx << TLOG_ENDL;
109 auto buffer_idx = req_idx / 2;
110 TLOG_ARB(5, uniqueLabel()) <<
"MPITransfer::moveFragment: Swapping in fragment to send to buffer " << buffer_idx << TLOG_ENDL;
111 Fragment& curfrag = payload_[buffer_idx];
112 curfrag = std::move(frag);
114 TLOG_ARB(5, uniqueLabel()) <<
"MPITransfer::moveFragment before send src=" << source_rank() <<
" dest=" << destination_rank() <<
" seqID=" << std::to_string(curfrag.sequenceID()) <<
" type=" << curfrag.typeString() <<
" found_idx=" << req_idx << TLOG_ENDL;
116 std::unique_lock<std::mutex> lk(mpi_mutex_);
119 TLOG_ARB(5, uniqueLabel()) <<
"MPITransfer::moveFragment: Using MPI_Isend" << TLOG_ENDL;
121 MPI_Issend(curfrag.headerAddress(), detail::RawFragmentHeader::num_words() *
sizeof(RawDataType), MPI_BYTE, destination_rank(), MPI_TAG_HEADER, MPI_COMM_WORLD, &reqs_[req_idx]);
123 auto sizeWrds = curfrag.size() - detail::RawFragmentHeader::num_words();
124 auto offset = curfrag.headerAddress() + detail::RawFragmentHeader::num_words();
125 MPI_Issend(offset, sizeWrds *
sizeof(RawDataType), MPI_BYTE, destination_rank(), MPI_TAG_DATA, MPI_COMM_WORLD, &reqs_[req_idx + 1]);
126 TLOG_ARB(5, uniqueLabel()) <<
"MPITransfer::moveFragment COMPLETE" << TLOG_ENDL;
128 TLOG_ARB(11, uniqueLabel()) <<
"MPITransfer::moveFragment COMPLETE: "
129 <<
" buffer_idx=" << buffer_idx
130 <<
" send_size=" << curfrag.size()
131 <<
" src=" << source_rank()
132 <<
" dest=" << destination_rank()
133 <<
" sequenceID=" << curfrag.sequenceID()
134 <<
" fragID=" << curfrag.fragmentID() << TLOG_ENDL;
135 return CopyStatus::kSuccess;
140 TLOG_ARB(6, uniqueLabel()) <<
"MPITransfer::receiveFragmentHeader entered tmo=" << std::to_string(timeout_usec) <<
" us (ignored)" << TLOG_ENDL;
142 int wait_result = MPI_SUCCESS;
146 std::unique_lock<std::mutex> lk(mpi_mutex_);
147 MPI_Irecv(&header, header.num_words() *
sizeof(RawDataType), MPI_BYTE, source_rank(), MPI_TAG_HEADER, MPI_COMM_WORLD, &req);
153 std::unique_lock<std::mutex> lk(mpi_mutex_);
154 wait_result = MPI_Test(&req, &flag, &status);
161 if (req != MPI_REQUEST_NULL)
163 TLOG_ERROR(uniqueLabel()) <<
"INTERNAL ERROR: req is not MPI_REQUEST_NULL in receiveFragmentHeader." << TLOG_ENDL;
164 throw art::Exception(art::errors::LogicError,
"MPITransfer: ") <<
"INTERNAL ERROR: req is not MPI_REQUEST_NULL in receiveFragmentHeader.";
167 TLOG_ARB(8, uniqueLabel()) <<
"MPITransfer::receiveFragmentHeader recvd" << TLOG_ENDL;
170 {TLOG_ARB(8, uniqueLabel()) <<
"MPITransfer::receiveFragmentHeader: " << my_rank
171 <<
" Wait_error=" << wait_result
172 <<
" status_error=" << status.MPI_ERROR
173 <<
" source=" << status.MPI_SOURCE
174 <<
" tag=" << status.MPI_TAG
175 <<
" Fragment_sequenceID=" << (uint64_t)header.sequence_id
176 <<
" Fragment_size=" << header.word_count
177 <<
" fragID=" << header.fragment_id << TLOG_ENDL;
179 char err_buffer[MPI_MAX_ERROR_STRING];
185 case MPI_ERR_IN_STATUS:
186 MPI_Error_string(status.MPI_ERROR, err_buffer, &resultlen);
187 TLOG_ERROR(uniqueLabel())
188 <<
"MPITransfer: Waitany ERROR: " << err_buffer <<
"\n" << TLOG_ENDL;
191 MPI_Error_string(wait_result, err_buffer, &resultlen);
192 TLOG_ERROR(uniqueLabel())
193 <<
"MPITransfer: Waitany ERROR: " << err_buffer <<
"\n" << TLOG_ENDL;
197 return status.MPI_SOURCE;
202 TLOG_ARB(6, uniqueLabel()) <<
"MPITransfer::receiveFragmentData entered wordCount=" << std::to_string(wordCount) << TLOG_ENDL;
203 int wait_result = MPI_SUCCESS;
208 std::unique_lock<std::mutex> lk(mpi_mutex_);
209 MPI_Irecv(destination, wordCount *
sizeof(RawDataType), MPI_BYTE, source_rank(), MPI_TAG_DATA, MPI_COMM_WORLD, &req);
215 std::unique_lock<std::mutex> lk(mpi_mutex_);
216 wait_result = MPI_Test(&req, &flag, &status);
222 if (req != MPI_REQUEST_NULL)
224 TLOG_ERROR(uniqueLabel()) <<
"INTERNAL ERROR: req is not MPI_REQUEST_NULL in receiveFragmentData." << TLOG_ENDL;
225 throw art::Exception(art::errors::LogicError,
"MPITransfer: ") <<
"INTERNAL ERROR: req is not MPI_REQUEST_NULL in receiveFragmentData.";
229 TLOG_ARB(8, uniqueLabel()) <<
"MPITransfer::receiveFragmentData recvd" << TLOG_ENDL;
232 char err_buffer[MPI_MAX_ERROR_STRING];
238 case MPI_ERR_IN_STATUS:
239 MPI_Error_string(status.MPI_ERROR, err_buffer, &resultlen);
240 TLOG_ERROR(uniqueLabel())
241 <<
"MPITransfer: Waitany ERROR: " << err_buffer <<
"\n" << TLOG_ENDL;
244 MPI_Error_string(wait_result, err_buffer, &resultlen);
245 TLOG_ERROR(uniqueLabel())
246 <<
"MPITransfer: Waitany ERROR: " << err_buffer <<
"\n" << TLOG_ENDL;
250 return status.MPI_SOURCE;
254 artdaq::MPITransfer::
255 cancelReq_(MPI_Request req)
const
257 if (req == MPI_REQUEST_NULL)
return;
259 TLOG_ARB(8, uniqueLabel()) <<
"Cancelling post" << TLOG_ENDL;
261 std::unique_lock<std::mutex> lk(mpi_mutex_);
262 int result = MPI_Cancel(&req);
263 if (result == MPI_SUCCESS)
266 MPI_Wait(&req, &status);
272 case MPI_ERR_REQUEST:
273 throw art::Exception(art::errors::LogicError,
"MPITransfer: ")
274 <<
"MPI_Cancel returned MPI_ERR_REQUEST.\n";
276 throw art::Exception(art::errors::LogicError,
"MPITransfer: ")
277 <<
"MPI_Cancel returned MPI_ERR_ARG.\n";
279 throw art::Exception(art::errors::LogicError,
"MPITransfer: ")
280 <<
"MPI_Cancel returned unknown error code.\n";
285 int artdaq::MPITransfer::findAvailable()
294 std::unique_lock<std::mutex> lk(mpi_mutex_);
295 MPI_Test(&reqs_[use_me], &flag, MPI_STATUS_IGNORE);
297 MPI_Test(&reqs_[use_me + 1], &flag2, MPI_STATUS_IGNORE);
299 pos_ = (pos_ + 2) % reqs_.size();
301 }
while (!flag2 && loops < buffer_count_);
303 TLOG_ARB(5, uniqueLabel()) <<
"findAvailable returning use_me=" << use_me <<
" loops=" << std::to_string(loops) << TLOG_ENDL;
size_t buffer_count_
The number of Fragment transfers the TransferInterface can handle simultaneously. ...
virtual int source_rank() const
Get the source rank for this TransferInterface instance.
virtual ~MPITransfer()
MPITransfer Destructor.
CopyStatus moveFragment(Fragment &&frag, size_t timeout_usec=std::numeric_limits< size_t >::max()) override
Move a Fragment to the destination.
int receiveFragmentHeader(detail::RawFragmentHeader &header, size_t receiveTimeout) override
Receive a Fragment Header from the transport mechanism.
MPITransfer(fhicl::ParameterSet pset, Role role)
MPITransfer Constructor.
static const int RECV_TIMEOUT
Value to be returned upon receive timeout. Because receivers otherwise return rank, this is also the limit on the number of ranks that artdaq currently supports.
Role
Used to determine if a TransferInterface is a Sender or Receiver.
CopyStatus copyFragment(Fragment &frag, size_t timeout_usec=std::numeric_limits< size_t >::max()) override
Copy a Fragment to the destination. Forces asynchronous send.
std::string uniqueLabel() const
Get the unique label of this TransferInterface instance.
This interface defines the functions used to transfer data between artdaq applications.
virtual int destination_rank() const
Get the destination rank for this TransferInterface instance.
MPITransfer is a TransferInterface implementation plugin that transfers data using MPI...
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.
int receiveFragmentData(RawDataType *destination, size_t wordCount) override
Receive the body of a Fragment to the given destination pointer.