artdaq  v3_02_00
TCPSocket_transfer.cc
1 // Sep 14, 2016. "TERMS AND CONDITIONS" governing this file are in the README
2 // or COPYING file. If you do not have such a file, one can be obtained by
3 // contacting Ron or Fermi Lab in Batavia IL, 60510, phone: 630-840-3000.
4 // $RCSfile: .emacs.gnu,v $
5 // rev="$Revision: 1.30 $$Date: 2016/03/01 14:27:27 $";
6 
7 // C Includes
8 #include <stdlib.h> // atoi, strtoul
9 #include <sys/socket.h> // socket, socklen_t
10 #include <sys/un.h> // sockaddr_un
11 #include <arpa/inet.h> // ntohl, ntohs
12 #include <sys/types.h> // size_t
13 #include <poll.h> // struct pollfd
14 
15 // C++ Includes
16 #include <string>
17 #include <fstream>
18 #include <stdexcept>
19 
20 // product Includes
21 #define TRACE_NAME "TCPSocketTransfer"
22 #include "artdaq/DAQdata/Globals.hh"
23 
24 // artdaq Includes
25 #include "artdaq/TransferPlugins/TCPSocketTransfer.hh"
28 #include "artdaq/TransferPlugins/detail/Timeout.hh"
29 #include "artdaq/TransferPlugins/detail/SRSockets.hh"
30 #include "artdaq-core/Data/Fragment.hh"
31 #include "artdaq-core/Utilities/TimeUtils.hh"
32 #include <iomanip>
33 
34 std::atomic<int> artdaq::TCPSocketTransfer::listen_thread_refcount_(0);
35 std::unique_ptr<boost::thread> artdaq::TCPSocketTransfer::listen_thread_ = nullptr;
36 std::map<int, std::set<int>> artdaq::TCPSocketTransfer::connected_fds_ = std::map<int, std::set<int>>();
37 std::mutex artdaq::TCPSocketTransfer::listen_thread_mutex_;
38 std::mutex artdaq::TCPSocketTransfer::connected_fd_mutex_;
39 
41 TCPSocketTransfer(fhicl::ParameterSet const& pset, TransferInterface::Role role)
42  : TransferInterface(pset, role)
43  , send_fd_(-1)
44  , active_receive_fd_(-1)
45  , last_active_receive_fd_(-1)
46  , rcvbuf_(pset.get<size_t>("tcp_receive_buffer_size", 0))
47  , sndbuf_(max_fragment_size_words_ * sizeof(artdaq::RawDataType) * buffer_count_)
48  , send_retry_timeout_us_(pset.get<size_t>("send_retry_timeout_us", 1000000))
49  , stats_connect_stop_(false)
50  , stats_connect_thread_(std::bind(&TCPSocketTransfer::stats_connect_, this))
51  , timeoutMessageArmed_(true)
52  , not_connected_count_(0)
53  , receive_err_threshold_(pset.get<size_t>("receive_socket_disconnected_max_count", 1000))
54  , receive_err_wait_us_(pset.get<size_t>("receive_socket_disconnected_wait_us", 10000))
55 {
56  TLOG(TLVL_DEBUG) << GetTraceName() << " Constructor: pset=" << pset.to_string() << ", role=" << (role == TransferInterface::Role::kReceive ? "kReceive" : "kSend");
57  auto masterPortOffset = pset.get<int>("offset_all_ports", 0);
58  hostMap_ = MakeHostMap(pset, masterPortOffset);
59 
60  std::function<void()> function = std::bind(&TCPSocketTransfer::reconnect_, this);
61  tmo_.add_periodic("reconnect", NULL, function, 200/*millisec*/);
62 
64  {
65  // Wait for sender to connect...
66  TLOG(TLVL_DEBUG) << GetTraceName() << ": Listening for connections";
67  start_listen_thread_();
68  TLOG(TLVL_DEBUG) << GetTraceName() << ": Done Listening";
69  }
70  else
71  {
72  TLOG(TLVL_DEBUG) << GetTraceName() << ": Connecting to destination";
73  connect_();
74  TLOG(TLVL_DEBUG) << GetTraceName() << ": Done Connecting";
75  }
76  TLOG(TLVL_DEBUG) << GetTraceName() << ": End of Constructor";
77 }
78 
79 artdaq::TCPSocketTransfer::~TCPSocketTransfer() noexcept
80 {
81  TLOG(TLVL_DEBUG) << GetTraceName() << ": Shutting down TCPSocketTransfer";
82  stats_connect_stop_ = true;
83  stopstatscv_.notify_all();
84  stats_connect_thread_.join();
85 
86  if (role() == TransferInterface::Role::kSend)
87  {
88  // close all open connections (send stop_v0) first
89  MessHead mh = { 0,MessHead::stop_v0,htons(TransferInterface::source_rank()),{0} };
90  if (send_fd_ != -1)
91  {
92  // should be blocking with modest timeo
93  timeval tv = { 0,100000 };
94  socklen_t len = sizeof(tv);
95  setsockopt(send_fd_, SOL_SOCKET, SO_SNDTIMEO, &tv, len);
96  write(send_fd_, &mh, sizeof(mh));
97  }
98  close(send_fd_);
99  }
100  else
101  {
102  {
103  std::unique_lock<std::mutex> fd_lock(connected_fd_mutex_);
104  auto it = connected_fds_[source_rank()].begin();
105  while (it != connected_fds_[source_rank()].end())
106  {
107  close(*it);
108  it = connected_fds_[source_rank()].erase(it);
109  }
110  connected_fds_.erase(source_rank());
111  }
112 
113  std::unique_lock<std::mutex> lk(listen_thread_mutex_);
114  listen_thread_refcount_--;
115  if (listen_thread_refcount_ == 0 && listen_thread_ && listen_thread_->joinable())
116  {
117  listen_thread_->join();
118  }
119  }
120  TLOG(TLVL_DEBUG) << GetTraceName() << ": End of Destructor";
121 }
122 
123 int artdaq::TCPSocketTransfer::receiveFragmentHeader(detail::RawFragmentHeader& header, size_t timeout_usec)
124 {
125  TLOG(5) << GetTraceName() << ": receiveFragmentHeader: BEGIN";
126  int ret_rank = RECV_TIMEOUT;
127 
128  if (getConnectedFDCount(source_rank()) == 0)
129  { // what if just listen_fd???
130  if (++not_connected_count_ > receive_err_threshold_) { return DATA_END; }
131  TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Receive socket not connected, returning RECV_TIMEOUT";
132  usleep(receive_err_wait_us_);
133  return RECV_TIMEOUT;
134  }
135  not_connected_count_ = 0;
136 
137  TLOG(5) << GetTraceName() << ": receiveFragmentHeader timeout_usec=" << std::to_string(timeout_usec);
138  //void* buff=alloca(max_fragment_size_words_*8);
139  size_t byte_cnt = 0;
140  int sts;
141  int offset = 0;
142  SocketState state = SocketState::Metadata;
143  int target_bytes = sizeof(MessHead);
144  uint64_t start_time_us = TimeUtils::gettimeofday_us();
145 
146  //while (active_receive_fd_ != -1)
147  //{
148  // TLOG(TLVL_TRACE) << GetTraceName() << ": Currently receiving from fd " << active_receive_fd_ << ", waiting!";
149  // usleep(1000);
150  //}
151 
152 
153  uint8_t* buff;
154 
155  int timeout_ms;
156  if (timeout_usec == 0)
157  timeout_ms = 0;
158  else
159  timeout_ms = (timeout_usec + 999) / 1000; // want at least 1 ms
160 
161  bool done = false;
162  while (!done && getConnectedFDCount(source_rank()) > 0)
163  {
164  if (active_receive_fd_ == -1)
165  {
166  std::unique_lock<std::mutex> lk(connected_fd_mutex_);
167  size_t fd_count = connected_fds_[source_rank()].size();
168  auto iter = connected_fds_[source_rank()].begin();
169  std::vector<pollfd> pollfds(fd_count);
170  for (size_t ii = 0; ii < fd_count; ++ii)
171  {
172  pollfds[ii].events = POLLIN | POLLERR;
173  pollfds[ii].fd = *iter;
174  ++iter;
175  }
176 
177  //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragment: Polling fd to see if there's data" ;
178  int num_fds_ready = poll(&pollfds[0], fd_count, timeout_ms);
179  if (num_fds_ready <= 0)
180  {
181  if (num_fds_ready == 0 && timeout_ms > 0)
182  {
183  TLOG(5) << GetTraceName() << ": receiveFragmentHeader: No data on receive socket, returning RECV_TIMEOUT";
184  return RECV_TIMEOUT;
185  }
186  break;
187  }
188 
189  size_t index = 0;
190  if (last_active_receive_fd_ != -1)
191  {
192  for (auto& pollfd : pollfds)
193  {
194  index++;
195  if (pollfd.fd == last_active_receive_fd_)
196  {
197  break;
198  }
199  }
200  }
201 
202  int active_index = -1;
203  short anomolous_events = 0;
204  for (size_t ii = index; ii < index + pollfds.size(); ++ii)
205  {
206  if (pollfds[index % pollfds.size()].revents & (POLLIN | POLLPRI | POLLHUP | POLLERR))
207  {
208  active_index = index % pollfds.size();
209  active_receive_fd_ = pollfds[active_index].fd;
210  break;
211  }
212  else if (pollfds[index % pollfds.size()].revents & (POLLNVAL))
213  {
214  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: FD is closed, most likely because the peer went away. Removing from fd list.";
215  close(pollfds[index].fd);
216  std::unique_lock<std::mutex> lk(connected_fd_mutex_);
217  connected_fds_[source_rank()].erase(pollfds[index].fd);
218  continue;
219  }
220  else if (pollfds[index % pollfds.size()].revents)
221  {
222  anomolous_events |= pollfds[index % pollfds.size()].revents;
223  }
224  }
225 
226  if (active_index == -1)
227  {
228  if (anomolous_events)
229  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Wrong event received from a pollfd. Mask: " << static_cast<int>(anomolous_events);
230  active_receive_fd_ = -1;
231  continue;
232  }
233 
234  if (!done && timeout_usec > 0)
235  {
236  // calc next timeout_ms (unless timed out)
237  size_t delta_us = TimeUtils::gettimeofday_us() - start_time_us;
238  if (delta_us > timeout_usec)
239  {
240  return RECV_TIMEOUT;
241  }
242  timeout_ms = ((timeout_usec - delta_us) + 999) / 1000; // want at least 1 ms
243  }
244  }
245 
246  if (state == SocketState::Metadata)
247  {
248  //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Reading Message Header" ;
249  buff = &(mha[offset]);
250  byte_cnt = sizeof(MessHead) - offset;
251  }
252  else
253  {
254  //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Reading data" ;
255  buff = reinterpret_cast<uint8_t*>(&header) + offset;
256  byte_cnt = mh.byte_count - offset;
257  }
258 
259  if (byte_cnt > 0)
260  {
261  TLOG(6) << GetTraceName() << ": receiveFragmentHeader: Reading " << byte_cnt << " bytes from socket";
262  sts = read(active_receive_fd_, buff, byte_cnt);
263  TLOG(6) << GetTraceName() << ": receiveFragmentHeader: Done with read";
264  }
265 
266  TLOG(7) << GetTraceName() << ": receiveFragmentHeader state=" << static_cast<int>(state) << " read=" << sts;
267  if (sts < 0)
268  {
269  TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentHeader: Error on receive, closing socket " << " (errno=" << errno << ": " << strerror(errno) << ")";
270  close(active_receive_fd_);
271  std::unique_lock<std::mutex> lk(connected_fd_mutex_);
272  connected_fds_[source_rank()].erase(active_receive_fd_);
273  active_receive_fd_ = -1;
274  }
275  else
276  {
277  // see if we're done (with this state)
278  sts = offset += sts;
279  if (sts >= target_bytes)
280  {
281  TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Target read bytes reached. Changing state";
282  offset = 0;
283  if (state == SocketState::Metadata)
284  {
285  state = SocketState::Data;
286  mh.byte_count = ntohl(mh.byte_count);
287  mh.source_id = ntohs(mh.source_id);
288  target_bytes = mh.byte_count;
289 
290  if (mh.message_type == MessHead::stop_v0)
291  {
292  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Stop Message received. Closing socket " << active_receive_fd_;
293  close(active_receive_fd_);
294  std::unique_lock<std::mutex> lk(connected_fd_mutex_);
295  connected_fds_[source_rank()].erase(active_receive_fd_);
296  active_receive_fd_ = -1;
297  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: There are now " << connected_fds_[source_rank()].size() << " active senders.";
298  }
299 
300  if (target_bytes == 0)
301  {
302  //Probably a stop_v0, return timeout so we can try again.
303  return RECV_TIMEOUT;
304  }
305  }
306  else
307  {
308  ret_rank = source_rank();
309  TLOG(8) << GetTraceName() << ": receiveFragmentHeader done sts=" << sts << " src=" << ret_rank;
310  TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Done receiving fragment header. Moving into output.";
311 
312  done = true; // no more polls
313  break; // no more read of ready fds
314  }
315  }
316  }
317 
318  } // while(!done)...poll
319 
320  TLOG(5) << GetTraceName() << ": receiveFragmentHeader: Returning " << ret_rank;
321  return ret_rank;
322 }
323 
324 int artdaq::TCPSocketTransfer::receiveFragmentData(RawDataType* destination, size_t)
325 {
326  TLOG(9) << GetTraceName() << ": receiveFragmentData: BEGIN";
327  int ret_rank = RECV_TIMEOUT;
328  if (active_receive_fd_ == -1)
329  { // what if just listen_fd???
330  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Receive socket not connected, returning RECV_TIMEOUT";
331  return RECV_TIMEOUT;
332  }
333 
334  //void* buff=alloca(max_fragment_size_words_*8);
335  uint8_t* buff;
336  size_t byte_cnt = 0;
337  int sts;
338  int offset = 0;
339  SocketState state = SocketState::Metadata;
340  int target_bytes = sizeof(MessHead);
341 
342  pollfd pollfd_s;
343  pollfd_s.events = POLLIN | POLLPRI | POLLERR;
344  pollfd_s.fd = active_receive_fd_;
345 
346  bool done = false;
347  while (!done)
348  {
349  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Polling fd to see if there's data";
350  int num_fds_ready = poll(&pollfd_s, 1, 1000);
351  if (num_fds_ready <= 0)
352  {
353  if (num_fds_ready == 0)
354  {
355  TLOG(9) << GetTraceName() << ": receiveFragmentData: No data on receive socket, returning RECV_TIMEOUT";
356  active_receive_fd_ = -1;
357  return RECV_TIMEOUT;
358  }
359 
360  TLOG(TLVL_ERROR) << "Error in poll: errno=" << errno;
361  active_receive_fd_ = -1;
362  break;
363  }
364 
365  if (pollfd_s.revents & (POLLNVAL))
366  {
367  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: FD is closed, most likely because the peer went away. Removing from fd list.";
368  close(active_receive_fd_);
369  std::unique_lock<std::mutex> lk(connected_fd_mutex_);
370  connected_fds_[source_rank()].erase(active_receive_fd_);
371  active_receive_fd_ = -1;
372  break;
373  }
374  else if (!(pollfd_s.revents & (POLLIN | POLLPRI | POLLERR)))
375  {
376  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Wrong event received from pollfd: " << pollfd_s.revents;
377  close(active_receive_fd_);
378  std::unique_lock<std::mutex> lk(connected_fd_mutex_);
379  connected_fds_[source_rank()].erase(active_receive_fd_);
380  continue;
381  }
382 
383  if (state == SocketState::Metadata)
384  {
385  //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Reading Message Header" ;
386  buff = &(mha[offset]);
387  byte_cnt = sizeof(MessHead) - offset;
388  }
389  else
390  {
391  //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Reading data" ;
392  buff = reinterpret_cast<uint8_t*>(destination) + offset;
393  byte_cnt = mh.byte_count - offset;
394  }
395 
396  //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Reading " << byte_cnt << " bytes from socket" ;
397  sts = read(active_receive_fd_, buff, byte_cnt);
398  //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Done with read" ;
399 
400  TLOG(10) << GetTraceName() << ": recvFragment state=" << static_cast<int>(state) << " read=" << sts;
401  if (sts < 0)
402  {
403  TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Error on receive, closing socket"
404  << " (errno=" << errno << ": " << strerror(errno) << ")";
405  close(active_receive_fd_);
406  std::unique_lock<std::mutex> lk(connected_fd_mutex_);
407  connected_fds_[source_rank()].erase(active_receive_fd_);
408  active_receive_fd_ = -1;
409  }
410  else
411  {
412  // see if we're done (with this state)
413  sts = offset += sts;
414  if (sts >= target_bytes)
415  {
416  TLOG(9) << GetTraceName() << ": receiveFragmentData: Target read bytes reached. Changing state";
417  offset = 0;
418  if (state == SocketState::Metadata)
419  {
420  state = SocketState::Data;
421  mh.byte_count = ntohl(mh.byte_count);
422  mh.source_id = ntohs(mh.source_id);
423  target_bytes = mh.byte_count;
424  }
425  else
426  {
427  ret_rank = source_rank();
428  TLOG(11) << GetTraceName() << ": receiveFragmentData done sts=" << sts << " src=" << ret_rank;
429  TLOG(9) << GetTraceName() << ": receiveFragmentData: Done receiving fragment. Moving into output.";
430 
431  done = true; // no more polls
432  break; // no more read of ready fds
433  }
434  }
435  }
436 
437  // Check if we were asked to do a 0-size receive
438  if (target_bytes == 0 && state == SocketState::Data)
439  {
440  ret_rank = source_rank();
441  TLOG(11) << GetTraceName() << ": receiveFragmentData done sts=" << sts << " src=" << ret_rank;
442  TLOG(9) << GetTraceName() << ": receiveFragmentData: Done receiving fragment. Moving into output.";
443 
444  done = true; // no more polls
445  }
446 
447  } // while(!done)...poll
448 
449  last_active_receive_fd_ = active_receive_fd_;
450  active_receive_fd_ = -1;
451 
452  TLOG(9) << GetTraceName() << ": receiveFragmentData: Returning " << ret_rank;
453  return ret_rank;
454 }
455 
457 {
458  switch (role())
459  {
461  return send_fd_ != -1;
463  TLOG(TLVL_DEBUG) << GetTraceName() << ": isRunning: There are " << getConnectedFDCount(source_rank()) << " fds connected.";
464  return getConnectedFDCount(source_rank()) > 0;
465  }
466  return false;
467 }
468 
469 // Send the given Fragment. Return the rank of the destination to which
470 // the Fragment was sent OR -1 if to none.
471 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendFragment_(Fragment&& frag, size_t send_timeout_usec)
472 {
473  TLOG(12) << GetTraceName() << ": sendFragment begin";
474  artdaq::Fragment grab_ownership_frag = std::move(frag);
475 
476  // Send Fragment Header
477 
478  iovec iov = { reinterpret_cast<void*>(grab_ownership_frag.headerAddress()),
479  detail::RawFragmentHeader::num_words() * sizeof(RawDataType) };
480 
481  auto sts = sendData_(&iov, 1, send_retry_timeout_us_);
482  auto start_time = std::chrono::steady_clock::now();
483  //If it takes more than 10 seconds to write a Fragment header, give up
484  while (sts != CopyStatus::kSuccess && (send_timeout_usec == 0 || TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_timeout_usec) && TimeUtils::GetElapsedTimeMicroseconds(start_time) < 10000000)
485  {
486  TLOG(13) << GetTraceName() << ": sendFragment: Timeout or Error sending fragment";
487  sts = sendData_(&iov, 1, send_retry_timeout_us_);
488  usleep(1000);
489  }
490  if (sts != CopyStatus::kSuccess) return sts;
491 
492  // Send Fragment Data
493 
494  iov = { reinterpret_cast<void*>(grab_ownership_frag.headerAddress() + detail::RawFragmentHeader::num_words()),
495  grab_ownership_frag.sizeBytes() - detail::RawFragmentHeader::num_words() * sizeof(RawDataType) };
496  sts = sendData_(&iov, 1, send_retry_timeout_us_);
497  start_time = std::chrono::steady_clock::now();
498  while (sts != CopyStatus::kSuccess && (send_timeout_usec == 0 || TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_timeout_usec) && TimeUtils::GetElapsedTimeMicroseconds(start_time) < 10000000)
499  {
500  TLOG(13) << GetTraceName() << ": sendFragment: Timeout or Error sending fragment";
501  sts = sendData_(&iov, 1, send_retry_timeout_us_);
502  usleep(1000);
503  }
504 
505  TLOG(12) << GetTraceName() << ": sendFragment returning kSuccess";
506  return sts;
507 }
508 
509 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendData_(const void* buf, size_t bytes, size_t send_timeout_usec)
510 {
511  TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_ Converting buf to iovec";
512  iovec iov = { (void*)buf, bytes };
513  return sendData_(&iov, 1, send_timeout_usec);
514 }
515 
516 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendData_(const struct iovec* iov, int iovcnt, size_t send_timeout_usec)
517 {
518  // check all connected??? -- currently just check fd!=-1
519  if (send_fd_ == -1)
520  {
521  if (timeoutMessageArmed_)
522  {
523  TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_: Send fd is not open. Returning kTimeout";
524  timeoutMessageArmed_ = false;
525  }
526  return CopyStatus::kTimeout;
527  }
528  timeoutMessageArmed_ = true;
529  TLOG(14) << GetTraceName() << ": send_timeout_usec is " << std::to_string(send_timeout_usec) << ", currently unused.";
530 
531  //TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_: Determining write size" ;
532  uint32_t total_to_write_bytes = 0;
533  std::vector<iovec> iov_in(iovcnt + 1); // need contiguous (for the unlike case that only partial MH
534  std::vector<iovec> iovv(iovcnt + 2); // 1 more for mh and another one for any partial
535  int ii;
536  for (ii = 0; ii < iovcnt; ++ii)
537  {
538  iov_in[ii + 1] = iov[ii];
539  total_to_write_bytes += iov[ii].iov_len;
540  }
541  //TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_: Constructing Message Header" ;
542  MessHead mh = { 0,MessHead::data_v0,htons(source_rank()),{htonl(total_to_write_bytes)} };
543  iov_in[0].iov_base = &mh;
544  iov_in[0].iov_len = sizeof(mh);
545  total_to_write_bytes += sizeof(mh);
546 
547  ssize_t sts = 0;
548  ssize_t total_written_bytes = 0;
549  ssize_t per_write_max_bytes = (32 * 1024);
550 
551  size_t in_iov_idx = 0; // only increment this when we know the associated data has been xferred
552  size_t out_iov_idx = 0;
553  ssize_t this_write_bytes = 0;
554 
555  do
556  {
557  // The first out_iov may be set at the end of the previous loop.
558  // iov looping from below (b/c of the latter, we need to check this_write_bytes)
559  for (;
560  (in_iov_idx + out_iov_idx) < iov_in.size() && this_write_bytes < per_write_max_bytes;
561  ++out_iov_idx)
562  {
563  this_write_bytes += iov_in[in_iov_idx + out_iov_idx].iov_len;
564  iovv[out_iov_idx] = iov_in[in_iov_idx + out_iov_idx];
565  }
566  if (this_write_bytes > per_write_max_bytes)
567  {
568  iovv[out_iov_idx - 1].iov_len -= this_write_bytes - per_write_max_bytes;
569  this_write_bytes = per_write_max_bytes;
570  }
571 
572  // need to do blocking algorithm -- including throttled block notifications
573  do_again:
574  TLOG(14) << GetTraceName() << ": sendFragment b4 writev " << std::setw(7) << std::to_string(total_written_bytes) << " total_written_bytes send_fd_=" << send_fd_ << " in_idx=" << std::to_string(in_iov_idx)
575  << " iovcnt=" << std::to_string(out_iov_idx) << " 1st.len=" << std::to_string(iovv[0].iov_len);
576  //TLOG(TLVL_DEBUG) << GetTraceName() << " calling writev" ;
577  sts = writev(send_fd_, &(iovv[0]), out_iov_idx);
578  //TLOG(TLVL_DEBUG) << GetTraceName() << " done with writev" ;
579 
580  if (sts == -1)
581  {
582  if (errno == EAGAIN /* same as EWOULDBLOCK */)
583  {
584  TLOG(TLVL_DEBUG) << GetTraceName() << ": sendFragment EWOULDBLOCK";
585  fcntl(send_fd_, F_SETFL, 0); // clear O_NONBLOCK
586  blocking = true;
587  // NOTE: YES -- could drop here
588  goto do_again;
589  }
590  TLOG(TLVL_WARNING) << GetTraceName() << ": sendFragment_: WRITE ERROR: " << strerror(errno);
591  connect_state = 0; // any write error closes
592  close(send_fd_);
593  send_fd_ = -1;
595  }
596  else if (sts != this_write_bytes)
597  {
598  // we'll loop around -- with
599  TLOG(TLVL_DEBUG) << GetTraceName() << ": sendFragment writev sts(" << std::to_string(sts) << ")!=requested_send_bytes(" << std::to_string(this_write_bytes) << ")";
600  total_written_bytes += sts; // add sts to total_written_bytes now as sts is adjusted next
601  // find which iovs are done
602  for (ii = 0; (size_t)sts >= iovv[ii].iov_len; ++ii)
603  sts -= iovv[ii].iov_len;
604  in_iov_idx += ii; // done with these in_iovs
605  iovv[ii].iov_len -= sts; // adjust partial iov
606  iovv[ii].iov_base = (uint8_t*)(iovv[ii].iov_base) + sts; // adjust partial iov
607 
608  // add more to get up to per_write_max_bytes
609  out_iov_idx = 0;
610  if (ii != 0)
611  iovv[out_iov_idx] = iovv[ii];
612  // starting over
613  this_write_bytes = iovv[out_iov_idx].iov_len;
614  // add any left over from appropriate in_iov_idx --
615  // i.e. match this out_iov with the in_iov that was used to
616  // initialize it; see how close the out base+len is to in base+len
617  // check !>per_write_max_bytes
618  unsigned long additional = ((unsigned long)iov_in[in_iov_idx].iov_base + iov_in[in_iov_idx].iov_len)
619  - ((unsigned long)iovv[out_iov_idx].iov_base + iovv[out_iov_idx].iov_len);
620  if (additional)
621  {
622  iovv[out_iov_idx].iov_len += additional;
623  this_write_bytes += additional;
624  if (this_write_bytes > per_write_max_bytes)
625  {
626  iovv[out_iov_idx].iov_len -= this_write_bytes - per_write_max_bytes;
627  this_write_bytes = per_write_max_bytes;
628  }
629  }
630  ++out_iov_idx; // done with
631  TLOG(TLVL_TRACE) << GetTraceName() << ": sendFragment writev sts!=: this_write_bytes=" << std::to_string(this_write_bytes)
632  << " out_iov_idx=" << std::to_string(out_iov_idx)
633  << " additional=" << std::to_string(additional)
634  << " ii=" << ii;
635  }
636  else
637  {
638  TLOG(TLVL_TRACE) << GetTraceName() << ": sendFragment writev sts(" << std::to_string(sts) << ")==requested_send_bytes(" << std::to_string(this_write_bytes) << ")";
639  total_written_bytes += sts;
640  --out_iov_idx; // make it the index of the last iovv
641  iovv[out_iov_idx].iov_base = (uint8_t*)(iovv[out_iov_idx].iov_base) + iovv[out_iov_idx].iov_len;
642  iovv[out_iov_idx].iov_len = 0;
643  in_iov_idx += out_iov_idx; // at least this many complete (one more if "last iovv" is complete
644  this_write_bytes = 0;
645  // need to check last iovv against appropriate iov_in
646  unsigned long additional = ((unsigned long)iov_in[in_iov_idx].iov_base + iov_in[in_iov_idx].iov_len)
647  - ((unsigned long)iovv[out_iov_idx].iov_base + iovv[out_iov_idx].iov_len);
648  if (additional)
649  {
650  iovv[out_iov_idx].iov_len += additional;
651  this_write_bytes += additional;
652  if (this_write_bytes > per_write_max_bytes)
653  {
654  iovv[out_iov_idx].iov_len -= this_write_bytes - per_write_max_bytes;
655  this_write_bytes = per_write_max_bytes;
656  }
657  if (out_iov_idx != 0)
658  iovv[0] = iovv[out_iov_idx];
659  out_iov_idx = 1;
660  }
661  else
662  {
663  ++in_iov_idx;
664  out_iov_idx = 0;
665  }
666  }
667  } while (total_written_bytes < total_to_write_bytes);
668  if (total_written_bytes > total_to_write_bytes)
669  TLOG(TLVL_ERROR) << GetTraceName() << ": sendFragment program error: too many bytes transferred";
670 
671  if (blocking)
672  {
673  blocking = false;
674  fcntl(send_fd_, F_SETFL, 0); // clear O_NONBLOCK
675  }
676  sts = total_written_bytes - sizeof(MessHead);
677 
678  TLOG(14) << GetTraceName() << ": sendFragment sts=" << std::to_string(sts);
680 }
681 
682 //=============================================
683 
684 void artdaq::TCPSocketTransfer::stats_connect_() // thread
685 {
686  std::cv_status sts;
687  while (!stats_connect_stop_)
688  {
689  std::string desc;
690  void* tag;
691  std::function<void()> function;
692  uint64_t ts_us;
693 
694  int msdly = tmo_.get_next_timeout_msdly();
695 
696  if (msdly <= 0)
697  msdly = 2000;
698 
699  std::unique_lock<std::mutex> lck(stopstatscvm_);
700  sts = stopstatscv_.wait_until(lck
701  , std::chrono::system_clock::now()
702  + std::chrono::milliseconds(msdly));
703  TLOG(15) << GetTraceName() << ": thread1 after wait_until(msdly=" << msdly << ") - sts=" << static_cast<int>(sts);
704 
705  if (sts == std::cv_status::no_timeout)
706  break;
707 
708  auto sts = tmo_.get_next_expired_timeout(desc, &tag, function, &ts_us);
709 
710  while (sts != -1 && desc != "")
711  {
712  if (function != NULL)
713  function();
714 
715  sts = tmo_.get_next_expired_timeout(desc, &tag, function, &ts_us);
716  }
717  }
718 }
719 
720 void artdaq::TCPSocketTransfer::connect_()
721 {
722  TLOG(TLVL_DEBUG) << GetTraceName() << ": Connecting sender socket";
723  int sndbuf_bytes = static_cast<int>(sndbuf_);
724  send_fd_ = TCPConnect(hostMap_[destination_rank()].hostname.c_str()
725  , calculate_port_()
726  , O_NONBLOCK
727  , sndbuf_bytes);
728  connect_state = 0;
729  blocking = 0;
730  TLOG(TLVL_DEBUG) << GetTraceName() << ": connect_ " + hostMap_[destination_rank()].hostname + ":" << calculate_port_() << " send_fd_=" << send_fd_;
731  if (send_fd_ != -1)
732  {
733  // write connect msg
734  TLOG(TLVL_DEBUG) << GetTraceName() << ": connect_: Writing connect message";
735  MessHead mh = { 0,MessHead::connect_v0,htons(source_rank()),{htonl(CONN_MAGIC)} };
736  ssize_t sts = write(send_fd_, &mh, sizeof(mh));
737  if (sts == -1)
738  {
739  TLOG(TLVL_ERROR) << GetTraceName() << ": connect_: Error writing connect message!";
740  // a write error here is completely unexpected!
741  connect_state = 0;
742  close(send_fd_);
743  send_fd_ = -1;
744  }
745  else
746  {
747  TLOG(TLVL_INFO) << GetTraceName() << ": connect_: Successfully connected";
748  // consider it all connected/established
749  connect_state = 1;
750  }
751  }
752 }
753 
754 void artdaq::TCPSocketTransfer::reconnect_()
755 {
756  TLOG(TLVL_TRACE) << GetTraceName() << ": check/reconnect";
757  if (send_fd_ == -1 && role() == TransferInterface::Role::kSend) return connect_();
758 }
759 
760 void artdaq::TCPSocketTransfer::start_listen_thread_()
761 {
762  std::unique_lock<std::mutex> start_lock(listen_thread_mutex_);
763  if (listen_thread_refcount_ == 0)
764  {
765  if (listen_thread_ && listen_thread_->joinable()) listen_thread_->join();
766  listen_thread_refcount_ = 1;
767  TLOG(TLVL_INFO) << GetTraceName() << ": Starting Listener Thread";
768  listen_thread_ = std::make_unique<boost::thread>(&TCPSocketTransfer::listen_, calculate_port_(), rcvbuf_);
769  }
770  else
771  {
772  listen_thread_refcount_++;
773  }
774 }
775 
776 void artdaq::TCPSocketTransfer::listen_(int port, size_t rcvbuf)
777 {
778  int listen_fd = -1;
779  while (listen_thread_refcount_ > 0)
780  {
781  TLOG(TLVL_TRACE) << "listen_: Listening/accepting new connections";
782  if (listen_fd == -1)
783  {
784  TLOG(TLVL_DEBUG) << "listen_: Opening listener";
785  listen_fd = TCP_listen_fd(port, rcvbuf);
786  }
787  if (listen_fd == -1)
788  {
789  TLOG(TLVL_DEBUG) << "listen_: Error creating listen_fd!";
790  break;
791  }
792 
793  int res;
794  timeval tv = { 2,0 }; // maybe increase of some global "debugging" flag set???
795  fd_set rfds;
796  FD_ZERO(&rfds);
797  FD_SET(listen_fd, &rfds);
798 
799  res = select(listen_fd + 1, &rfds, (fd_set *)0, (fd_set *)0, &tv);
800  if (res > 0)
801  {
802  int sts;
803  sockaddr_un un;
804  socklen_t arglen = sizeof(un);
805  int fd;
806  TLOG(TLVL_DEBUG) << "listen_: Calling accept";
807  fd = accept(listen_fd, (sockaddr *)&un, &arglen);
808  TLOG(TLVL_DEBUG) << "listen_: Done with accept";
809 
810  TLOG(TLVL_DEBUG) << "listen_: Reading connect message";
811  socklen_t lenlen = sizeof(tv);
812  /*sts=*/
813  setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, lenlen); // see man 7 socket.
814  MessHead mh;
815  uint64_t mark_us = TimeUtils::gettimeofday_us();
816  sts = read(fd, &mh, sizeof(mh));
817  uint64_t delta_us = TimeUtils::gettimeofday_us() - mark_us;
818  TLOG(TLVL_DEBUG) << "listen_: Read of connect message took " << delta_us << " microseconds.";
819  if (sts != sizeof(mh))
820  {
821  TLOG(TLVL_DEBUG) << "listen_: Wrong message header length received!";
822  close(fd);
823  continue;
824  }
825 
826  // check for "magic" and valid source_id(aka rank)
827  mh.source_id = ntohs(mh.source_id); // convert here as it is reference several times
828  if (ntohl(mh.conn_magic) != CONN_MAGIC || !(mh.message_type == MessHead::connect_v0)) // Allow for future connect message versions
829  {
830  TLOG(TLVL_DEBUG) << "listen_: Wrong magic bytes in header!";
831  close(fd);
832  continue;
833  }
834 
835  // now add (new) connection
836  std::unique_lock<std::mutex> lk(connected_fd_mutex_);
837  connected_fds_[mh.source_id].insert(fd);
838 
839  TLOG(TLVL_INFO) << "listen_: New fd is " << fd << " for source rank " << mh.source_id;
840  }
841  else
842  {
843  TLOG(16) << "listen_: No connections in timeout interval!";
844  }
845  }
846 
847  TLOG(TLVL_INFO) << "listen_: Shutting down connection listener";
848  if (listen_fd != -1) close(listen_fd);
849  std::unique_lock<std::mutex> lk(connected_fd_mutex_);
850  auto it = connected_fds_.begin();
851  while (it != connected_fds_.end())
852  {
853  auto& fd_set = it->second;
854  auto rank_it = fd_set.begin();
855  while (rank_it != fd_set.end())
856  {
857  close(*rank_it);
858  rank_it = fd_set.erase(rank_it);
859  }
860  it = connected_fds_.erase(it);
861  }
862 
863 } // do_connect_
864 
865 DEFINE_ARTDAQ_TRANSFER(artdaq::TCPSocketTransfer)
bool isRunning() override
Determine whether the TransferInterface plugin is able to send/receive data.
virtual int source_rank() const
Get the source rank for this TransferInterface instance.
int TCPConnect(char const *host_in, int dflt_port, long flags=0, int sndbufsiz=0)
Connect to a host on a given port.
Definition: TCPConnect.cc:215
uint32_t conn_magic
unsigned first is better for MessHead initializer: {0,0,my_node_idx_,CONN_MAGIC}
Definition: SRSockets.hh:38
std::string GetTraceName() const
Constructs a name suitable for TRACE messages.
This TransferInterface is a Receiver.
int receiveFragmentData(RawDataType *destination, size_t wordCount) override
Receive the body of a Fragment to the given destination pointer.
int TCP_listen_fd(int port, int rcvbuf)
Create a TCP listening socket on the given port and INADDR_ANY, with the given receive buffer...
TCPSocketTransfer(fhicl::ParameterSet const &ps, Role role)
TCPSocketTransfer Constructor.
int receiveFragmentHeader(detail::RawFragmentHeader &header, size_t receiveTimeout) override
Receive a Fragment Header from the transport mechanism.
This TransferInterface is a Sender.
int32_t byte_count
use CONN_MAGIC for connect_v0, data that follow for data_v0 (and 0 lenght data)
Definition: SRSockets.hh:39
Some error occurred, but no exception was thrown.
Role
Used to determine if a TransferInterface is a Sender or Receiver.
int64_t source_id
Rank of the source.
Definition: SRSockets.hh:34
MessType message_type
Message Type.
Definition: SRSockets.hh:33
The send operation completed successfully.
This interface defines the functions used to transfer data between artdaq applications.
TransferInterface implementation plugin that sends data using TCP sockets.
This header is sent by the TCPSocket_transfer to allow for more efficient writev calls.
Definition: SRSockets.hh:15
void add_periodic(const char *desc, void *tag, std::function< void()> &function, uint64_t period_us, uint64_t start_us=0)
Add a periodic timeout to the Timeout container.
Definition: Timeout.cc:67
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.