artdaq  v3_02_01
TCPSocket_transfer.cc
1 // Sep 14, 2016. "TERMS AND CONDITIONS" governing this file are in the README
2 // or COPYING file. If you do not have such a file, one can be obtained by
3 // contacting Ron or Fermi Lab in Batavia IL, 60510, phone: 630-840-3000.
4 // $RCSfile: .emacs.gnu,v $
5 // rev="$Revision: 1.30 $$Date: 2016/03/01 14:27:27 $";
6 
7 // C Includes
8 #include <stdlib.h> // atoi, strtoul
9 #include <sys/socket.h> // socket, socklen_t
10 #include <sys/un.h> // sockaddr_un
11 #include <arpa/inet.h> // ntohl, ntohs
12 #include <sys/types.h> // size_t
13 #include <poll.h> // struct pollfd
14 
15 // C++ Includes
16 #include <string>
17 #include <fstream>
18 #include <stdexcept>
19 
20 // product Includes
21 #define TRACE_NAME "TCPSocketTransfer"
22 #include "artdaq/DAQdata/Globals.hh"
23 
24 // artdaq Includes
25 #include "artdaq/TransferPlugins/TCPSocketTransfer.hh"
28 #include "artdaq/TransferPlugins/detail/Timeout.hh"
29 #include "artdaq/TransferPlugins/detail/SRSockets.hh"
30 #include "artdaq-core/Data/Fragment.hh"
31 #include "artdaq-core/Utilities/TimeUtils.hh"
32 #include <iomanip>
33 
34 std::atomic<int> artdaq::TCPSocketTransfer::listen_thread_refcount_(0);
35 std::unique_ptr<boost::thread> artdaq::TCPSocketTransfer::listen_thread_ = nullptr;
36 std::map<int, std::set<int>> artdaq::TCPSocketTransfer::connected_fds_ = std::map<int, std::set<int>>();
37 std::mutex artdaq::TCPSocketTransfer::listen_thread_mutex_;
38 std::mutex artdaq::TCPSocketTransfer::connected_fd_mutex_;
39 
41 TCPSocketTransfer(fhicl::ParameterSet const& pset, TransferInterface::Role role)
42  : TransferInterface(pset, role)
43  , send_fd_(-1)
44  , active_receive_fd_(-1)
45  , last_active_receive_fd_(-1)
46  , rcvbuf_(pset.get<size_t>("tcp_receive_buffer_size", 0))
47  , sndbuf_(max_fragment_size_words_ * sizeof(artdaq::RawDataType) * buffer_count_)
48  , send_retry_timeout_us_(pset.get<size_t>("send_retry_timeout_us", 1000000))
49  , timeoutMessageArmed_(true)
50  , not_connected_count_(0)
51  , receive_err_threshold_(pset.get<size_t>("receive_socket_disconnected_max_count", 1000))
52  , receive_err_wait_us_(pset.get<size_t>("receive_socket_disconnected_wait_us", 10000))
53 {
54  TLOG(TLVL_DEBUG) << GetTraceName() << " Constructor: pset=" << pset.to_string() << ", role=" << (role == TransferInterface::Role::kReceive ? "kReceive" : "kSend");
55  auto masterPortOffset = pset.get<int>("offset_all_ports", 0);
56  hostMap_ = MakeHostMap(pset, masterPortOffset);
57 
59  {
60  // Wait for sender to connect...
61  TLOG(TLVL_DEBUG) << GetTraceName() << ": Listening for connections";
62  start_listen_thread_();
63  TLOG(TLVL_DEBUG) << GetTraceName() << ": Done Listening";
64  }
65  else
66  {
67  TLOG(TLVL_DEBUG) << GetTraceName() << ": Connecting to destination";
68  connect_();
69  TLOG(TLVL_DEBUG) << GetTraceName() << ": Done Connecting";
70  }
71  TLOG(TLVL_DEBUG) << GetTraceName() << ": End of Constructor";
72 }
73 
74 artdaq::TCPSocketTransfer::~TCPSocketTransfer() noexcept
75 {
76  TLOG(TLVL_DEBUG) << GetTraceName() << ": Shutting down TCPSocketTransfer";
77 
78  if (role() == TransferInterface::Role::kSend)
79  {
80  // close all open connections (send stop_v0) first
81  MessHead mh = { 0,MessHead::stop_v0,htons(TransferInterface::source_rank()),{0} };
82  if (send_fd_ != -1)
83  {
84  // should be blocking with modest timeo
85  timeval tv = { 0,100000 };
86  socklen_t len = sizeof(tv);
87  setsockopt(send_fd_, SOL_SOCKET, SO_SNDTIMEO, &tv, len);
88  write(send_fd_, &mh, sizeof(mh));
89  }
90  close(send_fd_);
91  }
92  else
93  {
94  {
95  std::unique_lock<std::mutex> fd_lock(connected_fd_mutex_);
96  if (connected_fds_.count(source_rank()))
97  {
98  auto it = connected_fds_[source_rank()].begin();
99  while (it != connected_fds_[source_rank()].end())
100  {
101  close(*it);
102  it = connected_fds_[source_rank()].erase(it);
103  }
104  connected_fds_.erase(source_rank());
105  }
106  }
107 
108  std::unique_lock<std::mutex> lk(listen_thread_mutex_);
109  listen_thread_refcount_--;
110  if (listen_thread_refcount_ == 0 && listen_thread_ && listen_thread_->joinable())
111  {
112  listen_thread_->join();
113  }
114  }
115  TLOG(TLVL_DEBUG) << GetTraceName() << ": End of Destructor";
116 }
117 
118 int artdaq::TCPSocketTransfer::receiveFragmentHeader(detail::RawFragmentHeader& header, size_t timeout_usec)
119 {
120  TLOG(5) << GetTraceName() << ": receiveFragmentHeader: BEGIN";
121  int ret_rank = RECV_TIMEOUT;
122 
123  if (getConnectedFDCount(source_rank()) == 0)
124  { // what if just listen_fd???
125  if (++not_connected_count_ > receive_err_threshold_) { return DATA_END; }
126  TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Receive socket not connected, returning RECV_TIMEOUT";
127  usleep(receive_err_wait_us_);
128  return RECV_TIMEOUT;
129  }
130  not_connected_count_ = 0;
131 
132  TLOG(5) << GetTraceName() << ": receiveFragmentHeader timeout_usec=" << timeout_usec;
133  //void* buff=alloca(max_fragment_size_words_*8);
134  size_t byte_cnt = 0;
135  int sts;
136  int offset = 0;
137  SocketState state = SocketState::Metadata;
138  int target_bytes = sizeof(MessHead);
139  uint64_t start_time_us = TimeUtils::gettimeofday_us();
140 
141  //while (active_receive_fd_ != -1)
142  //{
143  // TLOG(TLVL_TRACE) << GetTraceName() << ": Currently receiving from fd " << active_receive_fd_ << ", waiting!";
144  // usleep(1000);
145  //}
146 
147 
148  uint8_t* buff;
149 
150  int timeout_ms;
151  if (timeout_usec == 0)
152  timeout_ms = 0;
153  else
154  timeout_ms = (timeout_usec + 999) / 1000; // want at least 1 ms
155 
156  bool done = false;
157  while (!done && getConnectedFDCount(source_rank()) > 0)
158  {
159  if (active_receive_fd_ == -1)
160  {
161  size_t fd_count = 0;
162  std::vector<pollfd> pollfds;
163  {
164  std::unique_lock<std::mutex> lk(connected_fd_mutex_);
165  fd_count = connected_fds_[source_rank()].size();
166  pollfds.resize(fd_count);
167  auto iter = connected_fds_[source_rank()].begin();
168  for (size_t ii = 0; ii < fd_count; ++ii)
169  {
170  pollfds[ii].events = POLLIN | POLLPRI | POLLERR;
171  pollfds[ii].fd = *iter;
172  ++iter;
173  }
174  }
175  //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragment: Polling fd to see if there's data" ;
176  int num_fds_ready = poll(&pollfds[0], fd_count, timeout_ms);
177  if (num_fds_ready <= 0)
178  {
179  TLOG(5) << GetTraceName() << ": receiveFragmentHeader: No data on receive socket, returning RECV_TIMEOUT";
180  return RECV_TIMEOUT;
181  }
182 
183  size_t index = 0;
184  if (last_active_receive_fd_ != -1)
185  {
186  for (auto& pollfd : pollfds)
187  {
188  index++;
189  if (pollfd.fd == last_active_receive_fd_)
190  {
191  break;
192  }
193  }
194  }
195 
196  int active_index = -1;
197  short anomolous_events = 0;
198  for (size_t ii = index; ii < index + pollfds.size(); ++ii)
199  {
200  auto pollfd_index = (ii + index) % pollfds.size();
201  if (pollfds[pollfd_index].revents & (POLLIN | POLLPRI))
202  {
203  active_index = pollfd_index;
204  active_receive_fd_ = pollfds[active_index].fd;
205  break;
206  }
207  else if (pollfds[pollfd_index].revents & (POLLHUP | POLLERR))
208  {
209  disconnect_receive_socket_(pollfds[pollfd_index].fd, "Poll returned POLLHUP or POLLERR, indicating problems with the sender.");
210  continue;
211  }
212  else if (pollfds[pollfd_index].revents & (POLLNVAL))
213  {
214  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: FD is closed, most likely because the peer went away. Removing from fd list.";
215  disconnect_receive_socket_(pollfds[pollfd_index].fd, "FD is closed, most likely because the peer went away.");
216  continue;
217  }
218  else if (pollfds[pollfd_index].revents)
219  {
220  anomolous_events |= pollfds[pollfd_index].revents;
221  }
222  }
223 
224  if (active_index == -1)
225  {
226  if (anomolous_events)
227  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Wrong event received from a pollfd. Mask: " << static_cast<int>(anomolous_events);
228  active_receive_fd_ = -1;
229  continue;
230  }
231 
232  if (!done && timeout_usec > 0)
233  {
234  // calc next timeout_ms (unless timed out)
235  size_t delta_us = TimeUtils::gettimeofday_us() - start_time_us;
236  if (delta_us > timeout_usec)
237  {
238  return RECV_TIMEOUT;
239  }
240  timeout_ms = ((timeout_usec - delta_us) + 999) / 1000; // want at least 1 ms
241  }
242  }
243 
244  if (state == SocketState::Metadata)
245  {
246  //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Reading Message Header" ;
247  buff = &(mha[offset]);
248  byte_cnt = sizeof(MessHead) - offset;
249  }
250  else
251  {
252  //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Reading data" ;
253  buff = reinterpret_cast<uint8_t*>(&header) + offset;
254  byte_cnt = mh.byte_count - offset;
255  }
256 
257  if (byte_cnt > 0)
258  {
259  TLOG(6) << GetTraceName() << ": receiveFragmentHeader: Reading " << byte_cnt << " bytes from socket";
260  sts = read(active_receive_fd_, buff, byte_cnt);
261  TLOG(6) << GetTraceName() << ": receiveFragmentHeader: Done with read";
262  }
263 
264  TLOG(7) << GetTraceName() << ": receiveFragmentHeader state=" << static_cast<int>(state) << " read=" << sts;
265  if (sts < 0)
266  {
267  TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentHeader: Error on receive, closing socket " << " (errno=" << errno << ": " << strerror(errno) << ")";
268  active_receive_fd_ = disconnect_receive_socket_(active_receive_fd_);
269  }
270  else
271  {
272  // see if we're done (with this state)
273  sts = offset += sts;
274  if (sts >= target_bytes)
275  {
276  TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Target read bytes reached. Changing state";
277  offset = 0;
278  if (state == SocketState::Metadata)
279  {
280  state = SocketState::Data;
281  mh.byte_count = ntohl(mh.byte_count);
282  mh.source_id = ntohs(mh.source_id);
283  target_bytes = mh.byte_count;
284 
285  if (mh.message_type == MessHead::stop_v0)
286  {
287  active_receive_fd_ = disconnect_receive_socket_(active_receive_fd_, "Stop Message received.");
288  }
289 
290  if (target_bytes == 0)
291  {
292  //Probably a stop_v0, return timeout so we can try again.
293  return RECV_TIMEOUT;
294  }
295  }
296  else
297  {
298  ret_rank = source_rank();
299  TLOG(8) << GetTraceName() << ": receiveFragmentHeader done sts=" << sts << " src=" << ret_rank;
300  TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Done receiving fragment header. Moving into output.";
301 
302  done = true; // no more polls
303  break; // no more read of ready fds
304  }
305  }
306  }
307 
308  } // while(!done)...poll
309 
310  TLOG(5) << GetTraceName() << ": receiveFragmentHeader: Returning " << ret_rank;
311  return ret_rank;
312 }
313 
314 int artdaq::TCPSocketTransfer::disconnect_receive_socket_(int fd, std::string msg)
315 {
316  TLOG(TLVL_DEBUG) << GetTraceName() << ": disconnect_receive_socket_: " << msg << " Closing socket " << fd;
317  close(fd);
318  std::unique_lock<std::mutex> lk(connected_fd_mutex_);
319  if (connected_fds_.count(source_rank()))
320  connected_fds_[source_rank()].erase(fd);
321  fd = -1;
322  TLOG(TLVL_DEBUG) << GetTraceName() << ": disconnect_receive_socket_: There are now " << connected_fds_[source_rank()].size() << " active senders.";
323  return fd;
324 }
325 
326 int artdaq::TCPSocketTransfer::receiveFragmentData(RawDataType* destination, size_t)
327 {
328  TLOG(9) << GetTraceName() << ": receiveFragmentData: BEGIN";
329  int ret_rank = RECV_TIMEOUT;
330  if (active_receive_fd_ == -1)
331  { // what if just listen_fd???
332  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Receive socket not connected, returning RECV_TIMEOUT";
333  return RECV_TIMEOUT;
334  }
335 
336  //void* buff=alloca(max_fragment_size_words_*8);
337  uint8_t* buff;
338  size_t byte_cnt = 0;
339  int sts;
340  int offset = 0;
341  SocketState state = SocketState::Metadata;
342  int target_bytes = sizeof(MessHead);
343 
344  pollfd pollfd_s;
345  pollfd_s.events = POLLIN | POLLPRI | POLLERR;
346  pollfd_s.fd = active_receive_fd_;
347 
348  bool done = false;
349  while (!done)
350  {
351  TLOG(9) << GetTraceName() << ": receiveFragmentData: Polling fd to see if there's data";
352  int num_fds_ready = poll(&pollfd_s, 1, 1000);
353  if (num_fds_ready <= 0)
354  {
355  if (num_fds_ready == 0)
356  {
357  TLOG(9) << GetTraceName() << ": receiveFragmentData: No data on receive socket, returning RECV_TIMEOUT";
358  active_receive_fd_ = -1;
359  return RECV_TIMEOUT;
360  }
361 
362  TLOG(TLVL_ERROR) << "Error in poll: errno=" << errno;
363  active_receive_fd_ = -1;
364  break;
365  }
366 
367  if (pollfd_s.revents & (POLLIN | POLLPRI))
368  {
369  // Expected, don't have to check revents any further
370  }
371  else if (pollfd_s.revents & (POLLNVAL))
372  {
373  disconnect_receive_socket_(pollfd_s.fd, "FD is closed, most likely because the peer went away.");
374  break;
375  }
376  else if (pollfd_s.revents & (POLLHUP | POLLERR))
377  {
378  disconnect_receive_socket_(pollfd_s.fd, "Poll returned POLLHUP or POLLERR, indicating problems with the sender.");
379  break;
380  }
381  else
382  {
383  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Wrong event received from pollfd: " << pollfd_s.revents;
384  disconnect_receive_socket_(pollfd_s.fd);
385  break;
386  }
387 
388  if (state == SocketState::Metadata)
389  {
390  //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Reading Message Header" ;
391  buff = &(mha[offset]);
392  byte_cnt = sizeof(MessHead) - offset;
393  }
394  else
395  {
396  //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Reading data" ;
397  buff = reinterpret_cast<uint8_t*>(destination) + offset;
398  byte_cnt = mh.byte_count - offset;
399  }
400 
401  TLOG(10) << GetTraceName() << ": receiveFragmentData: Reading " << byte_cnt << " bytes from socket into " << (void*)buff;
402  sts = read(active_receive_fd_, buff, byte_cnt);
403  //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Done with read" ;
404 
405  TLOG(10) << GetTraceName() << ": recvFragment state=" << static_cast<int>(state) << " read=" << sts;
406  if (sts < 0)
407  {
408  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Error on receive, closing socket"
409  << " (errno=" << errno << ": " << strerror(errno) << ")";
410  disconnect_receive_socket_(pollfd_s.fd);
411  }
412  else
413  {
414  // see if we're done (with this state)
415  sts = offset += sts;
416  if (sts >= target_bytes)
417  {
418  TLOG(9) << GetTraceName() << ": receiveFragmentData: Target read bytes reached. Changing state";
419  offset = 0;
420  if (state == SocketState::Metadata)
421  {
422  state = SocketState::Data;
423  mh.byte_count = ntohl(mh.byte_count);
424  mh.source_id = ntohs(mh.source_id);
425  target_bytes = mh.byte_count;
426  }
427  else
428  {
429  ret_rank = source_rank();
430  TLOG(11) << GetTraceName() << ": receiveFragmentData done sts=" << sts << " src=" << ret_rank;
431  TLOG(9) << GetTraceName() << ": receiveFragmentData: Done receiving fragment. Moving into output.";
432 
433 #if USE_ACKS
434  send_ack_(active_receive_fd_);
435 #endif
436 
437  done = true; // no more polls
438  break; // no more read of ready fds
439  }
440  }
441  }
442 
443  // Check if we were asked to do a 0-size receive
444  if (target_bytes == 0 && state == SocketState::Data)
445  {
446  ret_rank = source_rank();
447  TLOG(11) << GetTraceName() << ": receiveFragmentData done sts=" << sts << " src=" << ret_rank;
448  TLOG(9) << GetTraceName() << ": receiveFragmentData: Done receiving fragment. Moving into output.";
449 
450 #if USE_ACKS
451  send_ack_(active_receive_fd_);
452 #endif
453 
454  done = true; // no more polls
455  }
456 
457  } // while(!done)...poll
458 
459  last_active_receive_fd_ = active_receive_fd_;
460  active_receive_fd_ = -1;
461 
462  TLOG(9) << GetTraceName() << ": receiveFragmentData: Returning " << ret_rank;
463  return ret_rank;
464 }
465 
467 {
468  switch (role())
469  {
471  return send_fd_ != -1;
473  TLOG(TLVL_DEBUG) << GetTraceName() << ": isRunning: There are " << getConnectedFDCount(source_rank()) << " fds connected.";
474  return getConnectedFDCount(source_rank()) > 0;
475  }
476  return false;
477 }
478 
479 // Send the given Fragment. Return the rank of the destination to which
480 // the Fragment was sent OR -1 if to none.
481 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendFragment_(Fragment&& frag, size_t send_timeout_usec)
482 {
483  TLOG(12) << GetTraceName() << ": sendFragment begin";
484  artdaq::Fragment grab_ownership_frag = std::move(frag);
485 
486  reconnect_();
487  // Send Fragment Header
488 
489  iovec iov = { reinterpret_cast<void*>(grab_ownership_frag.headerAddress()),
490  detail::RawFragmentHeader::num_words() * sizeof(RawDataType) };
491 
492  auto sts = sendData_(&iov, 1, send_retry_timeout_us_);
493  auto start_time = std::chrono::steady_clock::now();
494  //If it takes more than 10 seconds to write a Fragment header, give up
495  while (sts != CopyStatus::kSuccess && (send_timeout_usec == 0 || TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_timeout_usec) && TimeUtils::GetElapsedTimeMicroseconds(start_time) < 10000000)
496  {
497  TLOG(13) << GetTraceName() << ": sendFragment: Timeout or Error sending fragment";
498  sts = sendData_(&iov, 1, send_retry_timeout_us_);
499  usleep(1000);
500  }
501  if (sts != CopyStatus::kSuccess) return sts;
502 
503  // Send Fragment Data
504 
505  iov = { reinterpret_cast<void*>(grab_ownership_frag.headerAddress() + detail::RawFragmentHeader::num_words()),
506  grab_ownership_frag.sizeBytes() - detail::RawFragmentHeader::num_words() * sizeof(RawDataType) };
507  sts = sendData_(&iov, 1, send_retry_timeout_us_);
508  start_time = std::chrono::steady_clock::now();
509  while (sts != CopyStatus::kSuccess && (send_timeout_usec == 0 || TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_timeout_usec) && TimeUtils::GetElapsedTimeMicroseconds(start_time) < 10000000)
510  {
511  TLOG(13) << GetTraceName() << ": sendFragment: Timeout or Error sending fragment";
512  sts = sendData_(&iov, 1, send_retry_timeout_us_);
513  usleep(1000);
514  }
515 
516 #if USE_ACKS
517  receive_ack_(send_fd_);
518 #endif
519 
520  TLOG(12) << GetTraceName() << ": sendFragment returning kSuccess";
521  return sts;
522 }
523 
524 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendData_(const void* buf, size_t bytes, size_t send_timeout_usec)
525 {
526  TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_ Converting buf to iovec";
527  iovec iov = { (void*)buf, bytes };
528  return sendData_(&iov, 1, send_timeout_usec);
529 }
530 
531 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendData_(const struct iovec* iov, int iovcnt, size_t send_timeout_usec)
532 {
533  // check all connected??? -- currently just check fd!=-1
534  if (send_fd_ == -1)
535  {
536  if (timeoutMessageArmed_)
537  {
538  TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_: Send fd is not open. Returning kTimeout";
539  timeoutMessageArmed_ = false;
540  }
541  return CopyStatus::kTimeout;
542  }
543  timeoutMessageArmed_ = true;
544  TLOG(14) << GetTraceName() << ": send_timeout_usec is " << send_timeout_usec << ", currently unused.";
545 
546  //TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_: Determining write size" ;
547  uint32_t total_to_write_bytes = 0;
548  std::vector<iovec> iov_in(iovcnt + 1); // need contiguous (for the unlike case that only partial MH
549  std::vector<iovec> iovv(iovcnt + 2); // 1 more for mh and another one for any partial
550  int ii;
551  for (ii = 0; ii < iovcnt; ++ii)
552  {
553  iov_in[ii + 1] = iov[ii];
554  total_to_write_bytes += iov[ii].iov_len;
555  }
556  //TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_: Constructing Message Header" ;
557  MessHead mh = { 0,MessHead::data_v0,htons(source_rank()),{htonl(total_to_write_bytes)} };
558  iov_in[0].iov_base = &mh;
559  iov_in[0].iov_len = sizeof(mh);
560  total_to_write_bytes += sizeof(mh);
561 
562  ssize_t sts = 0;
563  ssize_t total_written_bytes = 0;
564  ssize_t per_write_max_bytes = (32 * 1024);
565 
566  size_t in_iov_idx = 0; // only increment this when we know the associated data has been xferred
567  size_t out_iov_idx = 0;
568  ssize_t this_write_bytes = 0;
569 
570  do
571  {
572  // The first out_iov may be set at the end of the previous loop.
573  // iov looping from below (b/c of the latter, we need to check this_write_bytes)
574  for (;
575  (in_iov_idx + out_iov_idx) < iov_in.size() && this_write_bytes < per_write_max_bytes;
576  ++out_iov_idx)
577  {
578  this_write_bytes += iov_in[in_iov_idx + out_iov_idx].iov_len;
579  iovv[out_iov_idx] = iov_in[in_iov_idx + out_iov_idx];
580  }
581  if (this_write_bytes > per_write_max_bytes)
582  {
583  iovv[out_iov_idx - 1].iov_len -= this_write_bytes - per_write_max_bytes;
584  this_write_bytes = per_write_max_bytes;
585  }
586 
587  // need to do blocking algorithm -- including throttled block notifications
588  do_again:
589 #ifndef __OPTIMIZE__ // This can be an expensive TRACE call (even if disabled) due to multiplicity of calls
590  TLOG(14) << GetTraceName() << ": sendFragment b4 writev " << std::setw(7) << total_written_bytes << " total_written_bytes send_fd_=" << send_fd_ << " in_idx=" << in_iov_idx
591  << " iovcnt=" << out_iov_idx << " 1st.len=" << iovv[0].iov_len;
592 #endif
593  //TLOG(TLVL_DEBUG) << GetTraceName() << " calling writev" ;
594  sts = writev(send_fd_, &(iovv[0]), out_iov_idx);
595  //TLOG(TLVL_DEBUG) << GetTraceName() << " done with writev" ;
596 
597  if (sts == -1)
598  {
599  if (errno == EAGAIN /* same as EWOULDBLOCK */)
600  {
601  TLOG(TLVL_DEBUG) << GetTraceName() << ": sendFragment EWOULDBLOCK";
602  fcntl(send_fd_, F_SETFL, 0); // clear O_NONBLOCK
603  blocking = true;
604  // NOTE: YES -- could drop here
605  goto do_again;
606  }
607  TLOG(TLVL_WARNING) << GetTraceName() << ": sendFragment_: WRITE ERROR: " << strerror(errno);
608  connect_state = 0; // any write error closes
609  close(send_fd_);
610  send_fd_ = -1;
612  }
613  else if (sts != this_write_bytes)
614  {
615  // we'll loop around -- with
616  TLOG(TLVL_DEBUG) << GetTraceName() << ": sendFragment writev sts(" << sts << ")!=requested_send_bytes(" << this_write_bytes << ")";
617  total_written_bytes += sts; // add sts to total_written_bytes now as sts is adjusted next
618  // find which iovs are done
619  for (ii = 0; (size_t)sts >= iovv[ii].iov_len; ++ii)
620  sts -= iovv[ii].iov_len;
621  in_iov_idx += ii; // done with these in_iovs
622  iovv[ii].iov_len -= sts; // adjust partial iov
623  iovv[ii].iov_base = (uint8_t*)(iovv[ii].iov_base) + sts; // adjust partial iov
624 
625  // add more to get up to per_write_max_bytes
626  out_iov_idx = 0;
627  if (ii != 0)
628  iovv[out_iov_idx] = iovv[ii];
629  // starting over
630  this_write_bytes = iovv[out_iov_idx].iov_len;
631  // add any left over from appropriate in_iov_idx --
632  // i.e. match this out_iov with the in_iov that was used to
633  // initialize it; see how close the out base+len is to in base+len
634  // check !>per_write_max_bytes
635  unsigned long additional = ((unsigned long)iov_in[in_iov_idx].iov_base + iov_in[in_iov_idx].iov_len)
636  - ((unsigned long)iovv[out_iov_idx].iov_base + iovv[out_iov_idx].iov_len);
637  if (additional)
638  {
639  iovv[out_iov_idx].iov_len += additional;
640  this_write_bytes += additional;
641  if (this_write_bytes > per_write_max_bytes)
642  {
643  iovv[out_iov_idx].iov_len -= this_write_bytes - per_write_max_bytes;
644  this_write_bytes = per_write_max_bytes;
645  }
646  }
647  ++out_iov_idx; // done with
648  TLOG(TLVL_TRACE) << GetTraceName() << ": sendFragment writev sts!=: this_write_bytes=" << this_write_bytes
649  << " out_iov_idx=" << out_iov_idx
650  << " additional=" << additional
651  << " ii=" << ii;
652  }
653  else
654  {
655 #ifndef __OPTIMIZE__ // This can be an expensive TRACE call (even if disabled) due to multiplicity of calls
656  TLOG(TLVL_TRACE) << GetTraceName() << ": sendFragment writev sts(" << sts << ")==requested_send_bytes(" << this_write_bytes << ")";
657 #endif
658  total_written_bytes += sts;
659  --out_iov_idx; // make it the index of the last iovv
660  iovv[out_iov_idx].iov_base = (uint8_t*)(iovv[out_iov_idx].iov_base) + iovv[out_iov_idx].iov_len;
661  iovv[out_iov_idx].iov_len = 0;
662  in_iov_idx += out_iov_idx; // at least this many complete (one more if "last iovv" is complete
663  this_write_bytes = 0;
664  // need to check last iovv against appropriate iov_in
665  unsigned long additional = ((unsigned long)iov_in[in_iov_idx].iov_base + iov_in[in_iov_idx].iov_len)
666  - ((unsigned long)iovv[out_iov_idx].iov_base + iovv[out_iov_idx].iov_len);
667  if (additional)
668  {
669  iovv[out_iov_idx].iov_len += additional;
670  this_write_bytes += additional;
671  if (this_write_bytes > per_write_max_bytes)
672  {
673  iovv[out_iov_idx].iov_len -= this_write_bytes - per_write_max_bytes;
674  this_write_bytes = per_write_max_bytes;
675  }
676  if (out_iov_idx != 0)
677  iovv[0] = iovv[out_iov_idx];
678  out_iov_idx = 1;
679  }
680  else
681  {
682  ++in_iov_idx;
683  out_iov_idx = 0;
684  }
685  }
686  } while (total_written_bytes < total_to_write_bytes);
687  if (total_written_bytes > total_to_write_bytes)
688  TLOG(TLVL_ERROR) << GetTraceName() << ": sendFragment program error: too many bytes transferred";
689 
690  if (blocking)
691  {
692  blocking = false;
693  fcntl(send_fd_, F_SETFL, O_NONBLOCK); // set O_NONBLOCK
694  }
695  sts = total_written_bytes - sizeof(MessHead);
696 
697  TLOG(14) << GetTraceName() << ": sendFragment sts=" << sts;
699 }
700 
701 void artdaq::TCPSocketTransfer::connect_()
702 {
703  auto start_time = std::chrono::steady_clock::now();
704 
705  // Retry a few times if we can't connect
706  while (send_fd_ == -1 && TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_retry_timeout_us_ * 10)
707  {
708  TLOG(TLVL_DEBUG) << GetTraceName() << ": Connecting sender socket";
709  int sndbuf_bytes = static_cast<int>(sndbuf_);
710  send_fd_ = TCPConnect(hostMap_[destination_rank()].hostname.c_str()
711  , calculate_port_()
712  , O_NONBLOCK
713  , sndbuf_bytes);
714  if (send_fd_ == -1)
715  usleep(send_retry_timeout_us_);
716  }
717  connect_state = 0;
718  blocking = 0;
719  TLOG(TLVL_DEBUG) << GetTraceName() << ": connect_ " + hostMap_[destination_rank()].hostname + ":" << calculate_port_() << " send_fd_=" << send_fd_;
720  if (send_fd_ != -1)
721  {
722  // write connect msg
723  TLOG(TLVL_DEBUG) << GetTraceName() << ": connect_: Writing connect message";
724  MessHead mh = { 0,MessHead::connect_v0,htons(source_rank()),{htonl(CONN_MAGIC)} };
725  ssize_t sts = write(send_fd_, &mh, sizeof(mh));
726  if (sts == -1)
727  {
728  TLOG(TLVL_ERROR) << GetTraceName() << ": connect_: Error writing connect message!";
729  // a write error here is completely unexpected!
730  connect_state = 0;
731  close(send_fd_);
732  send_fd_ = -1;
733  }
734  else
735  {
736  TLOG(TLVL_INFO) << GetTraceName() << ": connect_: Successfully connected";
737  // consider it all connected/established
738  connect_state = 1;
739  }
740  }
741 }
742 
743 void artdaq::TCPSocketTransfer::reconnect_()
744 {
745  if (send_fd_ == -1 && role() == TransferInterface::Role::kSend)
746  {
747  TLOG(TLVL_TRACE) << GetTraceName() << ": check/reconnect";
748  return connect_();
749  }
750 }
751 
752 void artdaq::TCPSocketTransfer::start_listen_thread_()
753 {
754  std::unique_lock<std::mutex> start_lock(listen_thread_mutex_);
755  if (listen_thread_refcount_ == 0)
756  {
757  if (listen_thread_ && listen_thread_->joinable()) listen_thread_->join();
758  listen_thread_refcount_ = 1;
759  TLOG(TLVL_INFO) << GetTraceName() << ": Starting Listener Thread";
760  listen_thread_ = std::make_unique<boost::thread>(&TCPSocketTransfer::listen_, calculate_port_(), rcvbuf_);
761  }
762  else
763  {
764  listen_thread_refcount_++;
765  }
766 }
767 
768 #if USE_ACKS
769 void artdaq::TCPSocketTransfer::receive_ack_(int fd)
770 {
771  MessHead mh;
772  uint64_t mark_us = TimeUtils::gettimeofday_us();
773  fcntl(send_fd_, F_SETFL, 0); // clear O_NONBLOCK
774  auto sts = read(fd, &mh, sizeof(mh));
775  fcntl(send_fd_, F_SETFL, O_NONBLOCK); // set O_NONBLOCK
776  uint64_t delta_us = TimeUtils::gettimeofday_us() - mark_us;
777  TLOG(17) << GetTraceName() << ": receive_ack_: Read of ack message took " << delta_us << " microseconds.";
778  if (sts != sizeof(mh))
779  {
780  TLOG(TLVL_ERROR) << GetTraceName() << ": receive_ack_: Wrong message header length received! (actual " << sts << " != " << sizeof(mh) << " expected)";
781  close(fd);
782  send_fd_ = -1;
783  return;
784  }
785 
786  // check for "magic" and valid source_id(aka rank)
787  mh.source_id = ntohs(mh.source_id); // convert here as it is reference several times
788  if (mh.source_id != my_rank)
789  {
790  TLOG(TLVL_ERROR) << GetTraceName() << ": receive_ack_: Received ack for different sender! Rank=" << my_rank << ", hdr=" << mh.source_id;
791  close(fd);
792  send_fd_ = -1;
793  return;
794  }
795  if (ntohl(mh.conn_magic) != ACK_MAGIC || !(mh.message_type == MessHead::ack_v0)) // Allow for future connect message versions
796  {
797  TLOG(TLVL_ERROR) << GetTraceName() << ": receive_ack_: Wrong magic bytes in header!";
798  close(fd);
799  send_fd_ = -1;
800  return;
801  }
802 }
803 
804 void artdaq::TCPSocketTransfer::send_ack_(int fd)
805 {
806  MessHead mh = { 0,MessHead::ack_v0,htons(source_rank()),{ htonl(ACK_MAGIC) } };
807  write(fd, &mh, sizeof(mh));
808 }
809 #endif
810 
811 void artdaq::TCPSocketTransfer::listen_(int port, size_t rcvbuf)
812 {
813  int listen_fd = -1;
814  while (listen_thread_refcount_ > 0)
815  {
816  TLOG(TLVL_TRACE) << "listen_: Listening/accepting new connections";
817  if (listen_fd == -1)
818  {
819  TLOG(TLVL_DEBUG) << "listen_: Opening listener";
820  listen_fd = TCP_listen_fd(port, rcvbuf);
821  }
822  if (listen_fd == -1)
823  {
824  TLOG(TLVL_DEBUG) << "listen_: Error creating listen_fd!";
825  break;
826  }
827 
828  int res;
829  timeval tv = { 2,0 }; // maybe increase of some global "debugging" flag set???
830  fd_set rfds;
831  FD_ZERO(&rfds);
832  FD_SET(listen_fd, &rfds);
833 
834  res = select(listen_fd + 1, &rfds, (fd_set *)0, (fd_set *)0, &tv);
835  if (res > 0)
836  {
837  int sts;
838  sockaddr_un un;
839  socklen_t arglen = sizeof(un);
840  int fd;
841  TLOG(TLVL_DEBUG) << "listen_: Calling accept";
842  fd = accept(listen_fd, (sockaddr *)&un, &arglen);
843  TLOG(TLVL_DEBUG) << "listen_: Done with accept";
844 
845  TLOG(TLVL_DEBUG) << "listen_: Reading connect message";
846  socklen_t lenlen = sizeof(tv);
847  /*sts=*/
848  setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, lenlen); // see man 7 socket.
849  MessHead mh;
850  uint64_t mark_us = TimeUtils::gettimeofday_us();
851  sts = read(fd, &mh, sizeof(mh));
852  uint64_t delta_us = TimeUtils::gettimeofday_us() - mark_us;
853  TLOG(TLVL_DEBUG) << "listen_: Read of connect message took " << delta_us << " microseconds.";
854  if (sts != sizeof(mh))
855  {
856  TLOG(TLVL_DEBUG) << "listen_: Wrong message header length received!";
857  close(fd);
858  continue;
859  }
860 
861  // check for "magic" and valid source_id(aka rank)
862  mh.source_id = ntohs(mh.source_id); // convert here as it is reference several times
863  if (ntohl(mh.conn_magic) != CONN_MAGIC || !(mh.message_type == MessHead::connect_v0)) // Allow for future connect message versions
864  {
865  TLOG(TLVL_DEBUG) << "listen_: Wrong magic bytes in header!";
866  close(fd);
867  continue;
868  }
869 
870  // now add (new) connection
871  std::unique_lock<std::mutex> lk(connected_fd_mutex_);
872  connected_fds_[mh.source_id].insert(fd);
873 
874  TLOG(TLVL_INFO) << "listen_: New fd is " << fd << " for source rank " << mh.source_id;
875  }
876  else
877  {
878  TLOG(16) << "listen_: No connections in timeout interval!";
879  }
880  }
881 
882  TLOG(TLVL_INFO) << "listen_: Shutting down connection listener";
883  if (listen_fd != -1) close(listen_fd);
884  std::unique_lock<std::mutex> lk(connected_fd_mutex_);
885  auto it = connected_fds_.begin();
886  while (it != connected_fds_.end())
887  {
888  auto& fd_set = it->second;
889  auto rank_it = fd_set.begin();
890  while (rank_it != fd_set.end())
891  {
892  close(*rank_it);
893  rank_it = fd_set.erase(rank_it);
894  }
895  it = connected_fds_.erase(it);
896  }
897 
898 } // do_connect_
899 
900 DEFINE_ARTDAQ_TRANSFER(artdaq::TCPSocketTransfer)
bool isRunning() override
Determine whether the TransferInterface plugin is able to send/receive data.
virtual int source_rank() const
Get the source rank for this TransferInterface instance.
int TCPConnect(char const *host_in, int dflt_port, long flags=0, int sndbufsiz=0)
Connect to a host on a given port.
Definition: TCPConnect.cc:356
uint32_t conn_magic
unsigned first is better for MessHead initializer: {0,0,my_node_idx_,CONN_MAGIC}
Definition: SRSockets.hh:39
This TransferInterface is a Receiver.
int receiveFragmentData(RawDataType *destination, size_t wordCount) override
Receive the body of a Fragment to the given destination pointer.
int TCP_listen_fd(int port, int rcvbuf)
Create a TCP listening socket on the given port and INADDR_ANY, with the given receive buffer...
TCPSocketTransfer(fhicl::ParameterSet const &ps, Role role)
TCPSocketTransfer Constructor.
int receiveFragmentHeader(detail::RawFragmentHeader &header, size_t receiveTimeout) override
Receive a Fragment Header from the transport mechanism.
This TransferInterface is a Sender.
int32_t byte_count
use CONN_MAGIC for connect_v0, data that follow for data_v0 (and 0 lenght data)
Definition: SRSockets.hh:40
Some error occurred, but no exception was thrown.
Role
Used to determine if a TransferInterface is a Sender or Receiver.
int64_t source_id
Rank of the source.
Definition: SRSockets.hh:35
MessType message_type
Message Type.
Definition: SRSockets.hh:34
The send operation completed successfully.
hostMap_t MakeHostMap(fhicl::ParameterSet pset, int masterPortOffset=0, hostMap_t map=hostMap_t())
Make a hostMap_t from a HostMap::Config ParameterSet
Definition: HostMap.hh:77
This interface defines the functions used to transfer data between artdaq applications.
TransferInterface implementation plugin that sends data using TCP sockets.
This header is sent by the TCPSocket_transfer to allow for more efficient writev calls.
Definition: SRSockets.hh:15
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.