artdaq  3.13.00
Bundle_transfer.cc
1 #include <memory>
2 
3 #include "artdaq/DAQdata/Globals.hh"
4 #define TRACE_NAME (app_name + "_BundleTransfer").c_str()
5 
6 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
7 #include "artdaq/TransferPlugins/TCPSocketTransfer.hh"
8 #include "artdaq/TransferPlugins/TransferInterface.hh"
9 
10 #include <boost/thread.hpp>
11 
12 namespace artdaq {
20 {
21 public:
27  BundleTransfer(const fhicl::ParameterSet& pset, Role role);
28 
32  ~BundleTransfer() override;
33 
40  int receiveFragment(artdaq::Fragment& fragment,
41  size_t receiveTimeout) override
42  {
43  if (bundle_fragment_ == nullptr)
44  {
45  receive_bundle_fragment_(receiveTimeout);
46  if (current_rank_ < RECV_SUCCESS) return current_rank_;
47  }
48 
49  ContainerFragment cf(*bundle_fragment_);
50  TLOG(TLVL_DEBUG + 32) << GetTraceName() << "Retrieving Fragment " << (current_block_index_ + 1) << " of " << cf.block_count();
51  fragment.resizeBytes(cf.fragSize(current_block_index_) - sizeof(detail::RawFragmentHeader));
52  memcpy(fragment.headerAddress(), static_cast<const uint8_t*>(cf.dataBegin()) + cf.fragmentIndex(current_block_index_), cf.fragSize(current_block_index_));
53  current_block_index_++;
54  if (current_block_index_ >= cf.block_count()) // Index vs. count!
55  {
56  bundle_fragment_.reset(nullptr);
57  }
58  return current_rank_;
59  }
60 
67  int receiveFragmentHeader(detail::RawFragmentHeader& header, size_t receiveTimeout) override
68  {
69  if (bundle_fragment_ == nullptr)
70  {
71  receive_bundle_fragment_(receiveTimeout);
72  if (current_rank_ < RECV_SUCCESS) return current_rank_;
73  }
74  ContainerFragment cf(*bundle_fragment_);
75  TLOG(TLVL_DEBUG + 32) << GetTraceName() << "Retrieving Fragment Header " << (current_block_index_ + 1) << " of " << cf.block_count();
76  memcpy(&header, static_cast<const uint8_t*>(cf.dataBegin()) + cf.fragmentIndex(current_block_index_), sizeof(detail::RawFragmentHeader));
77  return current_rank_;
78  }
79 
86  int receiveFragmentData(RawDataType* destination, size_t /*wordCount*/) override
87  {
88  if (bundle_fragment_ == nullptr) // Should be impossible!
89  {
90  return RECV_TIMEOUT;
91  }
92  ContainerFragment cf(*bundle_fragment_);
93  TLOG(TLVL_DEBUG + 32) << GetTraceName() << "Retrieving Fragment Data " << (current_block_index_ + 1) << " of " << cf.block_count();
94  memcpy(destination, static_cast<const uint8_t*>(cf.dataBegin()) + cf.fragmentIndex(current_block_index_) + sizeof(detail::RawFragmentHeader), cf.fragSize(current_block_index_) - sizeof(detail::RawFragmentHeader));
95  current_block_index_++;
96  if (current_block_index_ >= cf.block_count()) // Index vs. count!
97  {
98  bundle_fragment_.reset(nullptr);
99  }
100  return current_rank_;
101  }
102 
109  CopyStatus transfer_fragment_min_blocking_mode(artdaq::Fragment const& fragment, size_t send_timeout_usec) override
110  {
111  TLOG(TLVL_DEBUG + 35) << GetTraceName() << "transfer_fragment_min_blocking_mode START";
112  last_send_call_reliable_ = false;
113  last_send_timeout_usec_ = send_timeout_usec;
114  {
115  std::unique_lock<std::mutex> lk(fragment_mutex_);
116  if (current_buffer_size_bytes_ > max_hold_size_bytes_)
117  {
118  fragment_cv_.wait_for(lk, std::chrono::microseconds(send_timeout_usec), [&] { return current_buffer_size_bytes_ < max_hold_size_bytes_; });
119  }
120 
121  if (current_buffer_size_bytes_ > max_hold_size_bytes_)
122  {
123  TLOG(TLVL_WARNING) << GetTraceName() << "Dropping data due to timeout in min_blocking_mode";
124  return CopyStatus::kTimeout;
125  }
126 
127  TLOG(TLVL_DEBUG + 35) << GetTraceName() << "transfer_fragment_min_blocking_mode after wait for buffer";
128  // Always send along System Fragments immediately
129  if (Fragment::isSystemFragmentType(fragment.type()))
130  {
131  system_fragment_cached_ = true;
132  }
133 
134  current_buffer_size_bytes_ += fragment.sizeBytes();
135  // Eww, we have to copy
136  fragment_buffer_.emplace_back(fragment);
137  }
138  TLOG(TLVL_DEBUG + 35) << GetTraceName() << "transfer_fragment_min_blocking_mode END";
139  return CopyStatus::kSuccess; // Might be a lie, but we're going to send from the thread proc
140  }
141 
147  CopyStatus transfer_fragment_reliable_mode(artdaq::Fragment&& fragment) override
148  {
149  TLOG(TLVL_DEBUG + 36) << GetTraceName() << "transfer_fragment_reliable_mode START";
150  last_send_call_reliable_ = true;
151  {
152  std::unique_lock<std::mutex> lk(fragment_mutex_);
153  if (current_buffer_size_bytes_ > max_hold_size_bytes_)
154  {
155  fragment_cv_.wait(lk, [&] { return current_buffer_size_bytes_ < max_hold_size_bytes_; });
156  }
157 
158  TLOG(TLVL_DEBUG + 36) << GetTraceName() << "transfer_fragment_reliable_mode after wait for buffer";
159 
160  // Always send along System Fragments immediately
161  if (Fragment::isSystemFragmentType(fragment.type()))
162  {
163  system_fragment_cached_ = true;
164  }
165 
166  current_buffer_size_bytes_ += fragment.sizeBytes();
167  fragment_buffer_.emplace_back(std::move(fragment));
168  }
169  TLOG(TLVL_DEBUG + 36) << GetTraceName() << "transfer_fragment_reliable_mode END";
170  return CopyStatus::kSuccess; // Might be a lie, but we're going to send from the thread proc
171  }
172 
177  bool isRunning() override { return running_; }
178 
183  void flush_buffers() override { theTransfer_->flush_buffers(); }
184 
185 private:
186  BundleTransfer(BundleTransfer const&) = delete;
187  BundleTransfer(BundleTransfer&&) = delete;
188  BundleTransfer& operator=(BundleTransfer const&) = delete;
189  BundleTransfer& operator=(BundleTransfer&&) = delete;
190 
191 private:
192  std::unique_ptr<TransferInterface> theTransfer_;
193  size_t send_threshold_bytes_;
194  size_t max_hold_size_bytes_;
195  int max_hold_time_us_;
196  FragmentPtr bundle_fragment_{nullptr};
197  Fragments fragment_buffer_;
198  size_t current_block_index_{0};
199  int current_rank_ = 0;
200 
201  std::chrono::steady_clock::time_point send_fragment_started_;
202  std::atomic<size_t> current_buffer_size_bytes_{0};
203  std::unique_ptr<boost::thread> send_timeout_thread_;
204  std::atomic<bool> system_fragment_cached_{false};
205  std::atomic<bool> send_timeout_thread_running_{false};
206  std::atomic<bool> last_send_call_reliable_{true};
207  std::atomic<size_t> last_send_timeout_usec_{1000000};
208  std::atomic<bool> running_{true};
209  std::mutex fragment_mutex_;
210  std::condition_variable fragment_cv_;
211 
212  bool check_send_(bool force);
213  void start_timeout_thread_();
214  void send_timeout_thread_proc_();
215  bool send_bundle_fragment_(bool forceSend = false);
216  void receive_bundle_fragment_(size_t receiveTimeout);
217 };
218 } // namespace artdaq
219 
220 artdaq::BundleTransfer::BundleTransfer(const fhicl::ParameterSet& pset, Role role)
221  : TransferInterface(pset, role)
222  , send_threshold_bytes_(pset.get<size_t>("send_threshold_bytes", 10 * 0x100000)) // 10 MB
223  , max_hold_size_bytes_(pset.get<size_t>("max_hold_size_bytes", 1000 * 0x100000)) // 1000 MB
224  , max_hold_time_us_(pset.get<int>("max_hold_time_us", 100000)) // 0.1 s
225 {
226  TLOG(TLVL_INFO) << GetTraceName() << "Begin BundleTransfer constructor";
227  TLOG(TLVL_INFO) << GetTraceName() << "Constructing TCPSocketTransfer";
228  theTransfer_ = std::make_unique<TCPSocketTransfer>(pset, role);
229 
230  if (role == Role::kSend)
231  {
232  start_timeout_thread_();
233  }
234 }
235 
237 {
238  if (role_ == Role::kSend)
239  {
240  send_timeout_thread_running_ = false;
241  if (send_timeout_thread_ && send_timeout_thread_->joinable())
242  {
243  send_timeout_thread_->join();
244  }
245  send_bundle_fragment_(true);
246  }
247  running_ = false;
248 }
249 
250 void artdaq::BundleTransfer::start_timeout_thread_()
251 {
252  if (send_timeout_thread_ && send_timeout_thread_->joinable())
253  {
254  send_timeout_thread_->join();
255  }
256  send_timeout_thread_running_ = true;
257  TLOG(TLVL_INFO) << GetTraceName() << "Starting Send Timeout Thread";
258 
259  try
260  {
261  send_timeout_thread_ = std::make_unique<boost::thread>(&BundleTransfer::send_timeout_thread_proc_, this);
262  char tname[16]; // Size 16 - see man page pthread_setname_np(3) and/or prctl(2)
263  snprintf(tname, sizeof(tname) - 1, "%d-SNDTMO", my_rank); // NOLINT
264  tname[sizeof(tname) - 1] = '\0'; // assure term. snprintf is not too evil :)
265  auto handle = send_timeout_thread_->native_handle();
266  pthread_setname_np(handle, tname);
267  }
268  catch (const boost::exception& e)
269  {
270  TLOG(TLVL_ERROR) << GetTraceName() << "Caught boost::exception starting Send Timeout thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
271  std::cerr << GetTraceName() << "Caught boost::exception starting Send Timeout thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
272  exit(5);
273  }
274 }
275 
276 void artdaq::BundleTransfer::send_timeout_thread_proc_()
277 {
278  while (send_timeout_thread_running_)
279  {
280  if (!send_bundle_fragment_())
281  {
282  usleep(5000);
283  }
284  }
285 }
286 
287 bool artdaq::BundleTransfer::check_send_(bool force)
288 {
289  if (force)
290  {
291  TLOG(TLVL_DEBUG + 37) << GetTraceName() << "check_send_: Send is forced, returning true";
292  return true;
293  }
294 
295  if (system_fragment_cached_.load())
296  {
297  TLOG(TLVL_DEBUG + 37) << GetTraceName() << "check_send_: System Fragment in cache, returning true";
298  return true;
299  }
300 
301  if (std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - send_fragment_started_).count() >= max_hold_time_us_)
302  {
303  TLOG(TLVL_DEBUG + 37) << GetTraceName() << "check_send_: Send timeout reached, returning true";
304  return true;
305  }
306 
307  if (current_buffer_size_bytes_ >= send_threshold_bytes_)
308  {
309  TLOG(TLVL_DEBUG + 37) << GetTraceName() << "check_send_: Buffer is full, returning true";
310  return true;
311  }
312 
313  TLOG(TLVL_DEBUG + 37) << GetTraceName() << "check_send_: returning false";
314  return false;
315 }
316 
317 bool artdaq::BundleTransfer::send_bundle_fragment_(bool forceSend)
318 {
319  {
320  std::unique_lock<std::mutex> lk(fragment_mutex_);
321 
322  bool send_fragment = check_send_(forceSend);
323 
324  if (send_fragment && fragment_buffer_.size() > 0)
325  {
326  TLOG(TLVL_DEBUG + 38) << GetTraceName() << "Swapping in new buffer";
327  Fragments temp_buffer;
328  size_t size = current_buffer_size_bytes_;
329  fragment_buffer_.swap(temp_buffer);
330  send_fragment_started_ = std::chrono::steady_clock::now();
331  current_buffer_size_bytes_ = 0;
332  lk.unlock();
333  TLOG(TLVL_DEBUG + 38) << GetTraceName() << "Notifying waiters";
334  fragment_cv_.notify_one();
335 
336  TLOG(TLVL_DEBUG + 38) << GetTraceName() << "Setting up Bundle Fragment";
337  bundle_fragment_.reset(new artdaq::Fragment(temp_buffer.front().sequenceID() + 1, temp_buffer.front().fragmentID()));
338  bundle_fragment_->setTimestamp(temp_buffer.front().timestamp());
339  bundle_fragment_->reserve(size / sizeof(artdaq::RawDataType));
340 
341  TLOG(TLVL_DEBUG + 38) << GetTraceName() << "Filling Bundle Fragment";
342  ContainerFragmentLoader container_fragment(*bundle_fragment_);
343  container_fragment.set_missing_data(false); // Buffer mode is never missing data, even if there IS no data.
344  container_fragment.addFragments(temp_buffer, true);
345  temp_buffer.clear();
346 
347  TLOG(TLVL_DEBUG + 38) << GetTraceName() << "Sending Fragment, reliable mode " << last_send_call_reliable_.load();
348  CopyStatus sts = CopyStatus::kSuccess;
349  if (last_send_call_reliable_)
350  {
351  sts = theTransfer_->transfer_fragment_reliable_mode(std::move(*bundle_fragment_.get()));
352  bundle_fragment_.reset(nullptr);
353  }
354  else
355  {
356  while (sts != CopyStatus::kSuccess && send_timeout_thread_running_)
357  {
358  sts = theTransfer_->transfer_fragment_min_blocking_mode(*bundle_fragment_.get(), last_send_timeout_usec_);
359  }
360  bundle_fragment_.reset(nullptr);
361  }
362 
363  if (sts != CopyStatus::kSuccess)
364  {
365  auto sts_string = sts == CopyStatus::kTimeout ? "timeout" : "other error";
366  TLOG(TLVL_WARNING) << GetTraceName() << "Transfer of Bundle fragment returned status " << sts_string;
367  }
368 
369  TLOG(TLVL_DEBUG + 38) << GetTraceName() << "Done sending Bundle Fragment";
370 
371  return true; // Status of actual transfer
372  }
373  }
374  return false; // Waiting on more data
375 }
376 
377 void artdaq::BundleTransfer::receive_bundle_fragment_(size_t receiveTimeout)
378 {
379  std::lock_guard<std::mutex> lk(fragment_mutex_);
380  bundle_fragment_.reset(new artdaq::Fragment(1));
381 
382  TLOG(TLVL_DEBUG + 34) << GetTraceName() << "Going to receive next bundle fragment";
383  current_rank_ = theTransfer_->receiveFragment(*bundle_fragment_, receiveTimeout);
384  TLOG(TLVL_DEBUG + 34) << GetTraceName() << "Done with receiveFragment, current_rank_ = " << current_rank_;
385 
386  if (current_rank_ < RECV_SUCCESS)
387  {
388  bundle_fragment_.reset(nullptr);
389  }
390  current_block_index_ = 0;
391 }
392 
393 DEFINE_ARTDAQ_TRANSFER(artdaq::BundleTransfer)
The BundleTransfer TransferInterface plugin sets up a Shmem_transfer plugin or TCPSocket_transfer plu...
Role role() const
Get the TransferInterface::Role of this TransferInterface.
void flush_buffers() override
Flush any in-flight data. This should be used by the receiver after the receive loop has ended...
int receiveFragmentHeader(detail::RawFragmentHeader &header, size_t receiveTimeout) override
Receive a Fragment Header from the transport mechanism.
BundleTransfer(const fhicl::ParameterSet &pset, Role role)
BundleTransfer Constructor.
This TransferInterface is a Sender.
Role
Used to determine if a TransferInterface is a Sender or Receiver.
The send operation completed successfully.
bool isRunning() override
Determine whether the TransferInterface plugin is able to send/receive data.
This interface defines the functions used to transfer data between artdaq applications.
CopyStatus transfer_fragment_reliable_mode(artdaq::Fragment &&fragment) override
Send a Fragment in reliable mode, using the underlying transfer plugin.
int receiveFragmentData(RawDataType *destination, size_t) override
Receive the body of a Fragment to the given destination pointer.
CopyStatus transfer_fragment_min_blocking_mode(artdaq::Fragment const &fragment, size_t send_timeout_usec) override
Send a Fragment in non-reliable mode, using the underlying transfer plugin.
For code clarity, things checking for successful receive should check retval &gt;= NO_RANK_INFO.
int receiveFragment(artdaq::Fragment &fragment, size_t receiveTimeout) override
Receive a Fragment, using the underlying transfer plugin.
Value to be returned upon receive timeout.
~BundleTransfer() override
BundleTransfer default Destructor.
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.