9 #include <sys/socket.h>
11 #include <arpa/inet.h>
12 #include <sys/types.h>
21 #define TRACE_NAME "TCPSocketTransfer"
22 #include "artdaq/DAQdata/Globals.hh"
25 #include "artdaq/TransferPlugins/TCPSocketTransfer.hh"
28 #include "artdaq/TransferPlugins/detail/Timeout.hh"
29 #include "artdaq/TransferPlugins/detail/SRSockets.hh"
30 #include "artdaq-core/Data/Fragment.hh"
31 #include "artdaq-core/Utilities/TimeUtils.hh"
39 , state_(SocketState::Metadata)
42 , rcvbuf_(pset.get<size_t>(
"tcp_receive_buffer_size", 0))
43 , sndbuf_(max_fragment_size_words_ * sizeof(artdaq::RawDataType) * buffer_count_)
44 , stats_connect_stop_(false)
46 , timeoutMessageArmed_(true)
49 auto masterPortOffset = pset.get<
int>(
"offset_all_ports",0);
50 auto hosts = pset.get<std::vector<fhicl::ParameterSet>>(
"host_map");
51 for (
auto& ps : hosts)
55 info.hostname = ps.get<std::string>(
"host",
"localhost");
56 info.portOffset = ps.get<
int>(
"portOffset", 5500) + masterPortOffset;
58 hostMap_[rank] = info;
61 std::function<void()>
function = std::bind(&TCPSocketTransfer::reconnect_,
this);
67 TLOG_DEBUG(
uniqueLabel()) <<
"Listening for connections" << TLOG_ENDL;
69 TLOG_DEBUG(
uniqueLabel()) <<
"Done Listening" << TLOG_ENDL;
73 TLOG_DEBUG(
uniqueLabel()) <<
"Connecting to destination" << TLOG_ENDL;
75 TLOG_DEBUG(
uniqueLabel()) <<
"Done Connecting" << TLOG_ENDL;
77 TLOG_DEBUG(
uniqueLabel()) <<
"End of TCPSocketTransfer Constructor" << TLOG_ENDL;
80 artdaq::TCPSocketTransfer::~TCPSocketTransfer()
82 TLOG_DEBUG(uniqueLabel()) <<
"Shutting down TCPSocketTransfer" << TLOG_ENDL;
83 stats_connect_stop_ =
true;
84 stopstatscv_.notify_all();
85 stats_connect_thread_.join();
94 timeval tv = { 0,100000 };
95 socklen_t len =
sizeof(tv);
96 setsockopt(fd_, SOL_SOCKET, SO_SNDTIMEO, &tv, len);
97 write(fd_, &mh,
sizeof(mh));
101 TLOG_DEBUG(uniqueLabel()) <<
"End of TCPSocketTransfer Destructor" << TLOG_ENDL;
106 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: BEGIN" << TLOG_ENDL;
107 int ret_rank = RECV_TIMEOUT;
110 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: Receive socket not connected, returning RECV_TIMEOUT" << TLOG_ENDL;
114 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::recvFragment timeout_usec=" << std::to_string(timeout_usec) << TLOG_ENDL;
118 uint64_t start_time_us = TimeUtils::gettimeofday_us();
121 pollfd_s.events = POLLIN | POLLERR;
126 if (timeout_usec == 0)
129 timeout_ms = (timeout_usec + 999) / 1000;
135 int num_fds_ready = poll(&pollfd_s, 1, timeout_ms);
136 if (num_fds_ready <= 0)
138 if (num_fds_ready == 0 && timeout_ms > 0)
140 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: No data on receive socket, returning RECV_TIMEOUT" << TLOG_ENDL;
146 if (!(pollfd_s.revents & (POLLIN | POLLERR)))
148 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: Wrong event received from pollfd" << TLOG_ENDL;
152 if (state_ == SocketState::Metadata)
155 buff = &(mha[offset]);
156 byte_cnt =
sizeof(
MessHead) - offset;
161 buff =
reinterpret_cast<uint8_t*
>(&header) + offset;
166 sts = read(fd_, buff, byte_cnt);
169 TLOG_ARB(9, uniqueLabel()) <<
"TCPSocketTransfer::recvFragment state=" <<
static_cast<int>(state_) <<
" read=" << sts <<
" (errno=" << strerror(errno) <<
")" << TLOG_ENDL;
172 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: Error on receive, closing socket" << TLOG_ENDL;
179 if (sts == target_bytes)
181 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: Target read bytes reached. Changing state" << TLOG_ENDL;
183 if (state_ == SocketState::Metadata)
185 state_ = SocketState::Data;
192 state_ = SocketState::Metadata;
194 ret_rank = source_rank();
195 TLOG_ARB(9, uniqueLabel()) <<
"TCPSocketTransfer::recvFragment done sts=" << sts <<
" src=" << ret_rank << TLOG_ENDL;
196 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: Done receiving fragment. Moving into output." << TLOG_ENDL;
204 if (!done && timeout_usec > 0)
207 size_t delta_us = TimeUtils::gettimeofday_us() - start_time_us;
208 if (delta_us > timeout_usec)
210 timeout_ms = ((timeout_usec - delta_us) + 999) / 1000;
214 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: Returning " << ret_rank << TLOG_ENDL;
220 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: BEGIN" << TLOG_ENDL;
221 int ret_rank = RECV_TIMEOUT;
224 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: Receive socket not connected, returning RECV_TIMEOUT" << TLOG_ENDL;
234 pollfd_s.events = POLLIN | POLLERR;
241 int num_fds_ready = poll(&pollfd_s, 1, -1);
242 if (num_fds_ready <= 0)
244 if (num_fds_ready == 0)
246 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: No data on receive socket, returning RECV_TIMEOUT" << TLOG_ENDL;
252 if (!(pollfd_s.revents & (POLLIN | POLLERR)))
254 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: Wrong event received from pollfd" << TLOG_ENDL;
258 if (state_ == SocketState::Metadata)
261 buff = &(mha[offset]);
262 byte_cnt =
sizeof(
MessHead) - offset;
267 buff =
reinterpret_cast<uint8_t*
>(destination) + offset;
272 sts = read(fd_, buff, byte_cnt);
275 TLOG_ARB(9, uniqueLabel()) <<
"recvFragment state=" <<
static_cast<int>(state_) <<
" read=" << sts <<
" (errno=" << strerror(errno) <<
")" << TLOG_ENDL;
278 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: Error on receive, closing socket" << TLOG_ENDL;
285 if (sts == target_bytes)
287 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: Target read bytes reached. Changing state" << TLOG_ENDL;
289 if (state_ == SocketState::Metadata)
291 state_ = SocketState::Data;
298 state_ = SocketState::Metadata;
300 ret_rank = source_rank();
301 TLOG_ARB(9, uniqueLabel()) <<
"recvFragment done sts=" << sts <<
" src=" << ret_rank << TLOG_ENDL;
302 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: Done receiving fragment. Moving into output." << TLOG_ENDL;
311 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::receiveFragment: Returning " << ret_rank << TLOG_ENDL;
319 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::sendFragment begin" << TLOG_ENDL;
320 artdaq::Fragment grab_ownership_frag = std::move(frag);
324 iovec iov = {
reinterpret_cast<void*
>(grab_ownership_frag.headerAddress()),
325 detail::RawFragmentHeader::num_words() *
sizeof(RawDataType) };
326 auto sts = sendData_(&iov, 1, send_timeout_usec);
327 while (sts != CopyStatus::kSuccess)
329 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::sendFragment: Timeout or Error sending fragment" << TLOG_ENDL;
330 sts = sendData_(&iov, 1, send_timeout_usec);
336 iov = {
reinterpret_cast<void*
>(grab_ownership_frag.headerAddress() + detail::RawFragmentHeader::num_words()),
337 grab_ownership_frag.sizeBytes() - detail::RawFragmentHeader::num_words() *
sizeof(RawDataType) };
338 sts = sendData_(&iov, 1, send_timeout_usec);
339 while (sts != CopyStatus::kSuccess)
341 TLOG_ARB(7, uniqueLabel()) <<
"TCPSocketTransfer::sendFragment: Timeout or Error sending fragment" << TLOG_ENDL;
342 sts = sendData_(&iov, 1, send_timeout_usec);
346 TRACE(7,
"TCPSocketTransfer::sendFragment returning kSuccess");
352 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::sendData_ Converting buf to iovec" << TLOG_ENDL;
353 iovec iov = { (
void*)buf, bytes };
354 return sendData_(&iov, 1, send_timeout_usec);
362 if (timeoutMessageArmed_)
364 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::sendData_: Send fd is not open. Returning kTimeout" << TLOG_ENDL;
365 timeoutMessageArmed_ =
false;
367 return CopyStatus::kTimeout;
369 timeoutMessageArmed_ =
true;
370 TLOG_ARB(12, uniqueLabel()) <<
"send_timeout_usec is " << std::to_string(send_timeout_usec) <<
", currently unused." << TLOG_ENDL;
373 uint32_t total_to_write_bytes = 0;
374 std::vector<iovec> iov_in(iovcnt + 1);
375 std::vector<iovec> iovv(iovcnt + 2);
377 for (ii = 0; ii < iovcnt; ++ii)
379 iov_in[ii + 1] = iov[ii];
380 total_to_write_bytes += iov[ii].iov_len;
383 MessHead mh = { 0,MessHead::data_v0,htons(source_rank()),htonl(total_to_write_bytes) };
384 iov_in[0].iov_base = &mh;
385 iov_in[0].iov_len =
sizeof(mh);
386 total_to_write_bytes +=
sizeof(mh);
389 ssize_t total_written_bytes = 0;
390 ssize_t per_write_max_bytes = (32 * 1024);
392 size_t in_iov_idx = 0;
393 size_t out_iov_idx = 0;
394 ssize_t this_write_bytes = 0;
401 (in_iov_idx + out_iov_idx) < iov_in.size() && this_write_bytes < per_write_max_bytes;
404 this_write_bytes += iov_in[in_iov_idx + out_iov_idx].iov_len;
405 iovv[out_iov_idx] = iov_in[in_iov_idx + out_iov_idx];
407 if (this_write_bytes > per_write_max_bytes)
409 iovv[out_iov_idx - 1].iov_len -= this_write_bytes - per_write_max_bytes;
410 this_write_bytes = per_write_max_bytes;
415 TLOG_ARB(7, uniqueLabel()) <<
"sendFragment b4 writev " << std::setw(7) << std::to_string(total_written_bytes) <<
" total_written_bytes fd=" << fd_ <<
" in_idx=" << std::to_string(in_iov_idx)
416 <<
" iovcnt=" << std::to_string(out_iov_idx) <<
" 1st.len=" << std::to_string(iovv[0].iov_len) << TLOG_ENDL;
418 sts = writev(fd_, &(iovv[0]), out_iov_idx);
423 if (errno == EAGAIN )
425 TLOG_ARB(2, uniqueLabel()) <<
"sendFragment EWOULDBLOCK" << TLOG_ENDL;
426 fcntl(fd_, F_SETFL, 0);
431 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::sendFragment_: WRITE ERROR: " << strerror(errno) << TLOG_ENDL;
437 else if (sts != this_write_bytes)
440 TLOG_ARB(4, uniqueLabel()) <<
"sendFragment writev sts(" << std::to_string(sts) <<
")!=requested_send_bytes(" << std::to_string(this_write_bytes) <<
")" << TLOG_ENDL;
441 total_written_bytes += sts;
443 for (ii = 0; (size_t)sts >= iovv[ii].iov_len; ++ii)
444 sts -= iovv[ii].iov_len;
446 iovv[ii].iov_len -= sts;
447 iovv[ii].iov_base = (uint8_t*)(iovv[ii].iov_base) + sts;
452 iovv[out_iov_idx] = iovv[ii];
454 this_write_bytes = iovv[out_iov_idx].iov_len;
459 unsigned long additional = ((
unsigned long)iov_in[in_iov_idx].iov_base + iov_in[in_iov_idx].iov_len)
460 - ((
unsigned long)iovv[out_iov_idx].iov_base + iovv[out_iov_idx].iov_len);
463 iovv[out_iov_idx].iov_len += additional;
464 this_write_bytes += additional;
465 if (this_write_bytes > per_write_max_bytes)
467 iovv[out_iov_idx].iov_len -= this_write_bytes - per_write_max_bytes;
468 this_write_bytes = per_write_max_bytes;
472 TLOG_ARB(4, uniqueLabel()) <<
"sendFragment writev sts!=: this_write_bytes=" << std::to_string(this_write_bytes)
473 <<
" out_iov_idx=" << std::to_string(out_iov_idx)
474 <<
" additional=" << std::to_string(additional)
475 <<
" ii=" << ii << TLOG_ENDL;
479 TLOG_ARB(4, uniqueLabel()) <<
"sendFragment writev sts(" << std::to_string(sts) <<
")==requested_send_bytes(" << std::to_string(this_write_bytes) <<
")" << TLOG_ENDL;
480 total_written_bytes += sts;
482 iovv[out_iov_idx].iov_base = (uint8_t*)(iovv[out_iov_idx].iov_base) + iovv[out_iov_idx].iov_len;
483 iovv[out_iov_idx].iov_len = 0;
484 in_iov_idx += out_iov_idx;
485 this_write_bytes = 0;
487 unsigned long additional = ((
unsigned long)iov_in[in_iov_idx].iov_base + iov_in[in_iov_idx].iov_len)
488 - ((
unsigned long)iovv[out_iov_idx].iov_base + iovv[out_iov_idx].iov_len);
491 iovv[out_iov_idx].iov_len += additional;
492 this_write_bytes += additional;
493 if (this_write_bytes > per_write_max_bytes)
495 iovv[out_iov_idx].iov_len -= this_write_bytes - per_write_max_bytes;
496 this_write_bytes = per_write_max_bytes;
498 if (out_iov_idx != 0)
499 iovv[0] = iovv[out_iov_idx];
508 }
while (total_written_bytes < total_to_write_bytes);
509 if (total_written_bytes > total_to_write_bytes)
510 TLOG_ARB(0, uniqueLabel()) <<
"sendFragment program error: too many bytes transferred" << TLOG_ENDL;
515 fcntl(fd_, F_SETFL, 0);
517 sts = total_written_bytes -
sizeof(
MessHead);
519 TLOG_ARB(10, uniqueLabel()) <<
"sendFragment sts=" << std::to_string(sts) << TLOG_ENDL;
525 void artdaq::TCPSocketTransfer::stats_connect_()
528 while (!stats_connect_stop_)
532 std::function<void()>
function;
535 int msdly = tmo_.get_next_timeout_msdly();
540 std::unique_lock<std::mutex> lck(stopstatscvm_);
541 sts = stopstatscv_.wait_until(lck
542 , std::chrono::system_clock::now()
543 + std::chrono::milliseconds(msdly));
544 TLOG_ARB(5, uniqueLabel()) <<
"thread1 after wait_until(msdly=" << msdly <<
") - sts=" <<
static_cast<int>(sts) << TLOG_ENDL;
546 if (sts == std::cv_status::no_timeout)
549 auto sts = tmo_.get_next_expired_timeout(desc, &tag,
function, &ts_us);
551 while (sts != -1 && desc !=
"")
553 if (
function != NULL)
556 sts = tmo_.get_next_expired_timeout(desc, &tag,
function, &ts_us);
561 void artdaq::TCPSocketTransfer::connect_()
563 TLOG_DEBUG(uniqueLabel()) <<
"Connecting sender socket" << TLOG_ENDL;
564 int sndbuf_bytes =
static_cast<int>(sndbuf_);
565 fd_ =
TCPConnect(hostMap_[destination_rank()].hostname.c_str()
571 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::connect_ " + hostMap_[destination_rank()].hostname +
":" << calculate_port_() <<
" fd_=" << fd_ << TLOG_ENDL;
575 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::connect_: Writing connect message" << TLOG_ENDL;
576 MessHead mh = { 0,MessHead::connect_v0,htons(source_rank()),htonl(CONN_MAGIC) };
577 ssize_t sts = write(fd_, &mh,
sizeof(mh));
580 TLOG_ERROR(uniqueLabel()) <<
"TCPSocketTransfer::connect_: Error writing connect message!" << TLOG_ENDL;
588 TLOG_INFO(uniqueLabel()) <<
"TCPSocketTransfer::connect_: Successfully connected" << TLOG_ENDL;
595 void artdaq::TCPSocketTransfer::reconnect_()
597 TLOG_ARB(5, uniqueLabel()) <<
"check/reconnect" << TLOG_ENDL;
602 void artdaq::TCPSocketTransfer::listen_()
604 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::listen_: Listening/accepting new connections" << TLOG_ENDL;
605 if (listen_fd_ == -1)
607 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::listen_: Opening listener" << TLOG_ENDL;
610 if (listen_fd_ == -1)
612 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::listen_: Error creating listen_fd_!" << TLOG_ENDL;
617 timeval tv = { 2,0 };
620 FD_SET(listen_fd_, &rfds);
622 res = select(listen_fd_ + 1, &rfds, (fd_set *)0, (fd_set *)0, &tv);
627 socklen_t arglen =
sizeof(un);
629 TLOG_DEBUG(uniqueLabel()) <<
"Calling accept" << TLOG_ENDL;
630 fd = accept(listen_fd_, (sockaddr *)&un, &arglen);
631 TLOG_DEBUG(uniqueLabel()) <<
"Done with accept" << TLOG_ENDL;
633 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::listen_: Reading connect message" << TLOG_ENDL;
634 socklen_t lenlen =
sizeof(tv);
636 setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, lenlen);
638 uint64_t mark_us = TimeUtils::gettimeofday_us();
639 sts = read(fd, &mh,
sizeof(mh));
640 uint64_t delta_us = TimeUtils::gettimeofday_us() - mark_us;
641 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::listen_: Read of connect message took " << delta_us <<
" microseconds." << TLOG_ENDL;
642 TLOG_ARB(10, uniqueLabel()) <<
"do_connect read of connect msg (after accept) took " << std::to_string(delta_us) <<
" microseconds" << TLOG_ENDL;
643 if (sts !=
sizeof(mh))
645 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::listen_: Wrong message header length received!" << TLOG_ENDL;
646 TLOG_ARB(0, uniqueLabel()) <<
"do_connect_ problem with connect msg sts(" << sts <<
")!=sizeof(mh)("<<std::to_string(
sizeof(mh)) <<
")" << TLOG_ENDL;
655 TLOG_DEBUG(uniqueLabel()) <<
"TCPSocketTransfer::listen_: Wrong magic bytes in header!" << TLOG_ENDL;
667 TLOG_INFO(uniqueLabel()) <<
"TCPSocketTransfer::listen_: New fd is " << fd_ << TLOG_ENDL;
669 TLOG_ARB(3, uniqueLabel()) <<
"do_connect_ connection from sender_rank=" << std::to_string(mh.
source_id) << TLOG_ENDL;
673 TLOG_ARB(10, uniqueLabel()) <<
"TCPSocketTransfer::do_connect_: No connections in timeout interval!" << TLOG_ENDL;
virtual int source_rank() const
Get the source rank for this TransferInterface instance.
int TCPConnect(char const *host_in, int dflt_port, long flags=0, int sndbufsiz=0)
Connect to a host on a given port.
uint32_t conn_magic
unsigned first is better for MessHead initializer: {0,0,my_node_idx_,CONN_MAGIC}
static const int RECV_TIMEOUT
Value to be returned upon receive timeout. Because receivers otherwise return rank, this is also the limit on the number of ranks that artdaq currently supports.
This TransferInterface is a Receiver.
int receiveFragmentData(RawDataType *destination, size_t wordCount) override
Receive the body of a Fragment to the given destination pointer.
int TCP_listen_fd(int port, int rcvbuf)
Create a TCP listening socket on the given port and INADDR_ANY, with the given receive buffer...
TCPSocketTransfer(fhicl::ParameterSet const &ps, Role role)
TCPSocketTransfer Constructor.
int receiveFragmentHeader(detail::RawFragmentHeader &header, size_t receiveTimeout) override
Receive a Fragment Header from the transport mechanism.
This TransferInterface is a Sender.
int32_t byte_count
use CONN_MAGIC for connect_v0, data that follow for data_v0 (and 0 lenght data)
Some error occurred, but no exception was thrown.
Role
Used to determine if a TransferInterface is a Sender or Receiver.
int64_t source_id
Rank of the source.
The send operation completed successfully.
std::string uniqueLabel() const
Get the unique label of this TransferInterface instance.
This interface defines the functions used to transfer data between artdaq applications.
TransferInterface implementation plugin that sends data using TCP sockets.
This header is sent by the TCPSocket_transfer to allow for more efficient writev calls.
void add_periodic(const char *desc, void *tag, std::function< void()> &function, uint64_t period_us, uint64_t start_us=0)
Add a periodic timeout to the Timeout container.
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.