00001
00002
00003
00004
00005
00006
00007
00008 #include <stdlib.h>
00009 #include <sys/socket.h>
00010 #include <sys/un.h>
00011 #include <arpa/inet.h>
00012 #include <sys/types.h>
00013 #include <poll.h>
00014
00015
00016 #include <string>
00017 #include <fstream>
00018 #include <stdexcept>
00019
00020
00021 #define TRACE_NAME "TCPSocketTransfer"
00022 #include "artdaq/DAQdata/Globals.hh"
00023
00024
00025 #include "artdaq/TransferPlugins/TCPSocketTransfer.hh"
00026 #include "artdaq/DAQdata/TCP_listen_fd.hh"
00027 #include "artdaq/DAQdata/TCPConnect.hh"
00028 #include "artdaq/TransferPlugins/detail/Timeout.hh"
00029 #include "artdaq/TransferPlugins/detail/SRSockets.hh"
00030 #include "artdaq-core/Data/Fragment.hh"
00031 #include "artdaq-core/Utilities/TimeUtils.hh"
00032 #include <iomanip>
00033
00034 std::atomic<int> artdaq::TCPSocketTransfer::listen_thread_refcount_(0);
00035 std::unique_ptr<boost::thread> artdaq::TCPSocketTransfer::listen_thread_ = nullptr;
00036 std::map<int, std::set<int>> artdaq::TCPSocketTransfer::connected_fds_ = std::map<int, std::set<int>>();
00037 std::mutex artdaq::TCPSocketTransfer::listen_thread_mutex_;
00038
00039 artdaq::TCPSocketTransfer::
00040 TCPSocketTransfer(fhicl::ParameterSet const& pset, TransferInterface::Role role)
00041 : TransferInterface(pset, role)
00042 , send_fd_(-1)
00043 , active_receive_fd_(-1)
00044 , last_active_receive_fd_(-1)
00045 , rcvbuf_(pset.get<size_t>("tcp_receive_buffer_size", 0))
00046 , sndbuf_(max_fragment_size_words_ * sizeof(artdaq::RawDataType) * buffer_count_)
00047 , send_retry_timeout_us_(pset.get<size_t>("send_retry_timeout_us", 1000000))
00048 , stats_connect_stop_(false)
00049 , stats_connect_thread_(std::bind(&TCPSocketTransfer::stats_connect_, this))
00050 , timeoutMessageArmed_(true)
00051 , not_connected_count_(0)
00052 , receive_err_threshold_(pset.get<size_t>("receive_socket_disconnected_max_count", 1000))
00053 , receive_err_wait_us_(pset.get<size_t>("receive_socket_disconnected_wait_us", 10000))
00054 {
00055 TLOG(TLVL_DEBUG) << GetTraceName() << " Constructor: pset=" << pset.to_string() << ", role=" << (role == TransferInterface::Role::kReceive ? "kReceive" : "kSend");
00056 auto masterPortOffset = pset.get<int>("offset_all_ports", 0);
00057 hostMap_ = MakeHostMap(pset, masterPortOffset);
00058
00059 std::function<void()> function = std::bind(&TCPSocketTransfer::reconnect_, this);
00060 tmo_.add_periodic("reconnect", NULL, function, 200);
00061
00062 if (role == TransferInterface::Role::kReceive)
00063 {
00064
00065 TLOG(TLVL_DEBUG) << GetTraceName() << ": Listening for connections";
00066 start_listen_thread_();
00067 TLOG(TLVL_DEBUG) << GetTraceName() << ": Done Listening";
00068 }
00069 else
00070 {
00071 TLOG(TLVL_DEBUG) << GetTraceName() << ": Connecting to destination";
00072 connect_();
00073 TLOG(TLVL_DEBUG) << GetTraceName() << ": Done Connecting";
00074 }
00075 TLOG(TLVL_DEBUG) << GetTraceName() << ": End of Constructor";
00076 }
00077
00078 artdaq::TCPSocketTransfer::~TCPSocketTransfer() noexcept
00079 {
00080 TLOG(TLVL_DEBUG) << GetTraceName() << ": Shutting down TCPSocketTransfer";
00081 stats_connect_stop_ = true;
00082 stopstatscv_.notify_all();
00083 stats_connect_thread_.join();
00084
00085 if (role() == TransferInterface::Role::kSend)
00086 {
00087
00088 MessHead mh = { 0,MessHead::stop_v0,htons(TransferInterface::source_rank()),{0} };
00089 if (send_fd_ != -1)
00090 {
00091
00092 timeval tv = { 0,100000 };
00093 socklen_t len = sizeof(tv);
00094 setsockopt(send_fd_, SOL_SOCKET, SO_SNDTIMEO, &tv, len);
00095 write(send_fd_, &mh, sizeof(mh));
00096 }
00097 close(send_fd_);
00098 }
00099 else
00100 {
00101 auto it = connected_fds_[source_rank()].begin();
00102 while (it != connected_fds_[source_rank()].end())
00103 {
00104 close(*it);
00105 it = connected_fds_[source_rank()].erase(it);
00106 }
00107
00108 std::unique_lock<std::mutex> lk(listen_thread_mutex_);
00109 listen_thread_refcount_--;
00110 if (listen_thread_refcount_ == 0 && listen_thread_ && listen_thread_->joinable())
00111 {
00112 listen_thread_->join();
00113 }
00114 }
00115 TLOG(TLVL_DEBUG) << GetTraceName() << ": End of Destructor";
00116 }
00117
00118 int artdaq::TCPSocketTransfer::receiveFragmentHeader(detail::RawFragmentHeader& header, size_t timeout_usec)
00119 {
00120 TLOG(5) << GetTraceName() << ": receiveFragmentHeader: BEGIN";
00121 int ret_rank = RECV_TIMEOUT;
00122
00123 if (connected_fds_[source_rank()].size() == 0)
00124 {
00125 if (++not_connected_count_ > receive_err_threshold_) { return DATA_END; }
00126 TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Receive socket not connected, returning RECV_TIMEOUT";
00127 usleep(receive_err_wait_us_);
00128 return RECV_TIMEOUT;
00129 }
00130 not_connected_count_ = 0;
00131
00132 TLOG(5) << GetTraceName() << ": receiveFragmentHeader timeout_usec=" << std::to_string(timeout_usec);
00133
00134 size_t byte_cnt = 0;
00135 int sts;
00136 int offset = 0;
00137 SocketState state = SocketState::Metadata;
00138 int target_bytes = sizeof(MessHead);
00139 uint64_t start_time_us = TimeUtils::gettimeofday_us();
00140
00141
00142
00143
00144
00145
00146
00147
00148 uint8_t* buff;
00149
00150 int timeout_ms;
00151 if (timeout_usec == 0)
00152 timeout_ms = 0;
00153 else
00154 timeout_ms = (timeout_usec + 999) / 1000;
00155
00156 bool done = false;
00157 while (!done && connected_fds_[source_rank()].size() > 0)
00158 {
00159 if (active_receive_fd_ == -1)
00160 {
00161 size_t fd_count = connected_fds_[source_rank()].size();
00162 auto iter = connected_fds_[source_rank()].begin();
00163 std::vector<pollfd> pollfds(fd_count);
00164 for (size_t ii = 0; ii < fd_count; ++ii)
00165 {
00166 pollfds[ii].events = POLLIN | POLLERR;
00167 pollfds[ii].fd = *iter;
00168 ++iter;
00169 }
00170
00171
00172 int num_fds_ready = poll(&pollfds[0], fd_count, timeout_ms);
00173 if (num_fds_ready <= 0)
00174 {
00175 if (num_fds_ready == 0 && timeout_ms > 0)
00176 {
00177 TLOG(5) << GetTraceName() << ": receiveFragmentHeader: No data on receive socket, returning RECV_TIMEOUT";
00178 return RECV_TIMEOUT;
00179 }
00180 break;
00181 }
00182
00183 size_t index = 0;
00184 if (last_active_receive_fd_ != -1)
00185 {
00186 for (auto& pollfd : pollfds)
00187 {
00188 index++;
00189 if (pollfd.fd == last_active_receive_fd_)
00190 {
00191 break;
00192 }
00193 }
00194 }
00195
00196 int active_index = -1;
00197 short anomolous_events = 0;
00198 for (size_t ii = index; ii < index + pollfds.size(); ++ii)
00199 {
00200 if (pollfds[index % pollfds.size()].revents & (POLLIN | POLLPRI | POLLHUP | POLLERR))
00201 {
00202 active_index = index % pollfds.size();
00203 active_receive_fd_ = pollfds[active_index].fd;
00204 break;
00205 }
00206 else if (pollfds[index % pollfds.size()].revents & (POLLNVAL))
00207 {
00208 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: FD is closed, most likely because the peer went away. Removing from fd list.";
00209 close(pollfds[active_index].fd);
00210 connected_fds_[source_rank()].erase(pollfds[active_index].fd);
00211 continue;
00212 }
00213 else if (pollfds[index % pollfds.size()].revents)
00214 {
00215 anomolous_events |= pollfds[index % pollfds.size()].revents;
00216 }
00217 }
00218
00219 if (active_index == -1)
00220 {
00221 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Wrong event received from a pollfd. Mask: " << static_cast<int>(anomolous_events);
00222 active_receive_fd_ = -1;
00223 continue;
00224 }
00225
00226 if (!done && timeout_usec > 0)
00227 {
00228
00229 size_t delta_us = TimeUtils::gettimeofday_us() - start_time_us;
00230 if (delta_us > timeout_usec)
00231 {
00232 return RECV_TIMEOUT;
00233 }
00234 timeout_ms = ((timeout_usec - delta_us) + 999) / 1000;
00235 }
00236 }
00237
00238 if (state == SocketState::Metadata)
00239 {
00240
00241 buff = &(mha[offset]);
00242 byte_cnt = sizeof(MessHead) - offset;
00243 }
00244 else
00245 {
00246
00247 buff = reinterpret_cast<uint8_t*>(&header) + offset;
00248 byte_cnt = mh.byte_count - offset;
00249 }
00250
00251 if (byte_cnt > 0)
00252 {
00253 TLOG(6) << GetTraceName() << ": receiveFragmentHeader: Reading " << byte_cnt << " bytes from socket";
00254 sts = read(active_receive_fd_, buff, byte_cnt);
00255 TLOG(6) << GetTraceName() << ": receiveFragmentHeader: Done with read";
00256 }
00257
00258 TLOG(7) << GetTraceName() << ": receiveFragmentHeader state=" << static_cast<int>(state) << " read=" << sts;
00259 if (sts < 0)
00260 {
00261 TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentHeader: Error on receive, closing socket " << " (errno=" << errno << ": " << strerror(errno) << ")";
00262 close(active_receive_fd_);
00263 connected_fds_[source_rank()].erase(active_receive_fd_);
00264 active_receive_fd_ = -1;
00265 }
00266 else
00267 {
00268
00269 sts = offset += sts;
00270 if (sts >= target_bytes)
00271 {
00272 TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Target read bytes reached. Changing state";
00273 offset = 0;
00274 if (state == SocketState::Metadata)
00275 {
00276 state = SocketState::Data;
00277 mh.byte_count = ntohl(mh.byte_count);
00278 mh.source_id = ntohs(mh.source_id);
00279 target_bytes = mh.byte_count;
00280
00281 if (mh.message_type == MessHead::stop_v0)
00282 {
00283 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Stop Message received. Closing socket";
00284 close(active_receive_fd_);
00285 connected_fds_[source_rank()].erase(active_receive_fd_);
00286 active_receive_fd_ = -1;
00287 }
00288 }
00289 else
00290 {
00291 ret_rank = source_rank();
00292 TLOG(8) << GetTraceName() << ": receiveFragmentHeader done sts=" << sts << " src=" << ret_rank;
00293 TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Done receiving fragment header. Moving into output.";
00294
00295 done = true;
00296 break;
00297 }
00298 }
00299 }
00300
00301 }
00302
00303 TLOG(5) << GetTraceName() << ": receiveFragmentHeader: Returning " << ret_rank;
00304 return ret_rank;
00305 }
00306
00307 int artdaq::TCPSocketTransfer::receiveFragmentData(RawDataType* destination, size_t)
00308 {
00309 TLOG(9) << GetTraceName() << ": receiveFragmentData: BEGIN";
00310 int ret_rank = RECV_TIMEOUT;
00311 if (active_receive_fd_ == -1)
00312 {
00313 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Receive socket not connected, returning RECV_TIMEOUT";
00314 return RECV_TIMEOUT;
00315 }
00316
00317
00318 uint8_t* buff;
00319 size_t byte_cnt = 0;
00320 int sts;
00321 int offset = 0;
00322 SocketState state = SocketState::Metadata;
00323 int target_bytes = sizeof(MessHead);
00324
00325 pollfd pollfd_s;
00326 pollfd_s.events = POLLIN | POLLPRI | POLLERR;
00327 pollfd_s.fd = active_receive_fd_;
00328
00329 bool done = false;
00330 while (!done)
00331 {
00332 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Polling fd to see if there's data";
00333 int num_fds_ready = poll(&pollfd_s, 1, 1000);
00334 if (num_fds_ready <= 0)
00335 {
00336 if (num_fds_ready == 0)
00337 {
00338 TLOG(9) << GetTraceName() << ": receiveFragmentData: No data on receive socket, returning RECV_TIMEOUT";
00339 active_receive_fd_ = -1;
00340 return RECV_TIMEOUT;
00341 }
00342
00343 TLOG(TLVL_ERROR) << "Error in poll: errno=" << errno;
00344 active_receive_fd_ = -1;
00345 break;
00346 }
00347
00348 if (pollfd_s.revents & (POLLNVAL))
00349 {
00350 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: FD is closed, most likely because the peer went away. Removing from fd list.";
00351 close(active_receive_fd_);
00352 connected_fds_[source_rank()].erase(active_receive_fd_);
00353 active_receive_fd_ = -1;
00354 break;
00355 }
00356 else if (!(pollfd_s.revents & (POLLIN | POLLPRI | POLLERR)))
00357 {
00358 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Wrong event received from pollfd: " << pollfd_s.revents;
00359 close(active_receive_fd_);
00360 connected_fds_[source_rank()].erase(active_receive_fd_);
00361 continue;
00362 }
00363
00364 if (state == SocketState::Metadata)
00365 {
00366
00367 buff = &(mha[offset]);
00368 byte_cnt = sizeof(MessHead) - offset;
00369 }
00370 else
00371 {
00372
00373 buff = reinterpret_cast<uint8_t*>(destination) + offset;
00374 byte_cnt = mh.byte_count - offset;
00375 }
00376
00377
00378 sts = read(active_receive_fd_, buff, byte_cnt);
00379
00380
00381 TLOG(10) << GetTraceName() << ": recvFragment state=" << static_cast<int>(state) << " read=" << sts;
00382 if (sts < 0)
00383 {
00384 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Error on receive, closing socket"
00385 << " (errno=" << errno << ": " << strerror(errno) << ")";
00386 close(active_receive_fd_);
00387 connected_fds_[source_rank()].erase(active_receive_fd_);
00388 active_receive_fd_ = -1;
00389 }
00390 else
00391 {
00392
00393 sts = offset += sts;
00394 if (sts >= target_bytes)
00395 {
00396 TLOG(9) << GetTraceName() << ": receiveFragmentData: Target read bytes reached. Changing state";
00397 offset = 0;
00398 if (state == SocketState::Metadata)
00399 {
00400 state = SocketState::Data;
00401 mh.byte_count = ntohl(mh.byte_count);
00402 mh.source_id = ntohs(mh.source_id);
00403 target_bytes = mh.byte_count;
00404 }
00405 else
00406 {
00407 ret_rank = source_rank();
00408 TLOG(11) << GetTraceName() << ": receiveFragmentData done sts=" << sts << " src=" << ret_rank;
00409 TLOG(9) << GetTraceName() << ": receiveFragmentData: Done receiving fragment. Moving into output.";
00410
00411 done = true;
00412 break;
00413 }
00414 }
00415 }
00416
00417
00418 if (target_bytes == 0 && state == SocketState::Data)
00419 {
00420 ret_rank = source_rank();
00421 TLOG(11) << GetTraceName() << ": receiveFragmentData done sts=" << sts << " src=" << ret_rank;
00422 TLOG(9) << GetTraceName() << ": receiveFragmentData: Done receiving fragment. Moving into output.";
00423
00424 done = true;
00425 }
00426
00427 }
00428
00429 last_active_receive_fd_ = active_receive_fd_;
00430 active_receive_fd_ = -1;
00431
00432 TLOG(9) << GetTraceName() << ": receiveFragmentData: Returning " << ret_rank;
00433 return ret_rank;
00434 }
00435
00436
00437
00438 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendFragment_(Fragment&& frag, size_t send_timeout_usec)
00439 {
00440 TLOG(12) << GetTraceName() << ": sendFragment begin";
00441 artdaq::Fragment grab_ownership_frag = std::move(frag);
00442
00443
00444
00445 iovec iov = { reinterpret_cast<void*>(grab_ownership_frag.headerAddress()),
00446 detail::RawFragmentHeader::num_words() * sizeof(RawDataType) };
00447
00448 auto sts = sendData_(&iov, 1, send_retry_timeout_us_);
00449 auto start_time = std::chrono::steady_clock::now();
00450
00451 while (sts != CopyStatus::kSuccess && (send_timeout_usec == 0 || TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_timeout_usec) && TimeUtils::GetElapsedTimeMicroseconds(start_time) < 10000000)
00452 {
00453 TLOG(13) << GetTraceName() << ": sendFragment: Timeout or Error sending fragment";
00454 sts = sendData_(&iov, 1, send_retry_timeout_us_);
00455 usleep(1000);
00456 }
00457 if (sts != CopyStatus::kSuccess) return sts;
00458
00459
00460
00461 iov = { reinterpret_cast<void*>(grab_ownership_frag.headerAddress() + detail::RawFragmentHeader::num_words()),
00462 grab_ownership_frag.sizeBytes() - detail::RawFragmentHeader::num_words() * sizeof(RawDataType) };
00463 sts = sendData_(&iov, 1, send_retry_timeout_us_);
00464 start_time = std::chrono::steady_clock::now();
00465 while (sts != CopyStatus::kSuccess && (send_timeout_usec == 0 || TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_timeout_usec) && TimeUtils::GetElapsedTimeMicroseconds(start_time) < 10000000)
00466 {
00467 TLOG(13) << GetTraceName() << ": sendFragment: Timeout or Error sending fragment";
00468 sts = sendData_(&iov, 1, send_retry_timeout_us_);
00469 usleep(1000);
00470 }
00471
00472 TLOG(12) << GetTraceName() << ": sendFragment returning kSuccess";
00473 return sts;
00474 }
00475
00476 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendData_(const void* buf, size_t bytes, size_t send_timeout_usec)
00477 {
00478 TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_ Converting buf to iovec";
00479 iovec iov = { (void*)buf, bytes };
00480 return sendData_(&iov, 1, send_timeout_usec);
00481 }
00482
00483 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendData_(const struct iovec* iov, int iovcnt, size_t send_timeout_usec)
00484 {
00485
00486 if (send_fd_ == -1)
00487 {
00488 if (timeoutMessageArmed_)
00489 {
00490 TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_: Send fd is not open. Returning kTimeout";
00491 timeoutMessageArmed_ = false;
00492 }
00493 return CopyStatus::kTimeout;
00494 }
00495 timeoutMessageArmed_ = true;
00496 TLOG(14) << GetTraceName() << ": send_timeout_usec is " << std::to_string(send_timeout_usec) << ", currently unused.";
00497
00498
00499 uint32_t total_to_write_bytes = 0;
00500 std::vector<iovec> iov_in(iovcnt + 1);
00501 std::vector<iovec> iovv(iovcnt + 2);
00502 int ii;
00503 for (ii = 0; ii < iovcnt; ++ii)
00504 {
00505 iov_in[ii + 1] = iov[ii];
00506 total_to_write_bytes += iov[ii].iov_len;
00507 }
00508
00509 MessHead mh = { 0,MessHead::data_v0,htons(source_rank()),{htonl(total_to_write_bytes)} };
00510 iov_in[0].iov_base = &mh;
00511 iov_in[0].iov_len = sizeof(mh);
00512 total_to_write_bytes += sizeof(mh);
00513
00514 ssize_t sts = 0;
00515 ssize_t total_written_bytes = 0;
00516 ssize_t per_write_max_bytes = (32 * 1024);
00517
00518 size_t in_iov_idx = 0;
00519 size_t out_iov_idx = 0;
00520 ssize_t this_write_bytes = 0;
00521
00522 do
00523 {
00524
00525
00526 for (;
00527 (in_iov_idx + out_iov_idx) < iov_in.size() && this_write_bytes < per_write_max_bytes;
00528 ++out_iov_idx)
00529 {
00530 this_write_bytes += iov_in[in_iov_idx + out_iov_idx].iov_len;
00531 iovv[out_iov_idx] = iov_in[in_iov_idx + out_iov_idx];
00532 }
00533 if (this_write_bytes > per_write_max_bytes)
00534 {
00535 iovv[out_iov_idx - 1].iov_len -= this_write_bytes - per_write_max_bytes;
00536 this_write_bytes = per_write_max_bytes;
00537 }
00538
00539
00540 do_again:
00541 TLOG(14) << GetTraceName() << ": sendFragment b4 writev " << std::setw(7) << std::to_string(total_written_bytes) << " total_written_bytes send_fd_=" << send_fd_ << " in_idx=" << std::to_string(in_iov_idx)
00542 << " iovcnt=" << std::to_string(out_iov_idx) << " 1st.len=" << std::to_string(iovv[0].iov_len);
00543
00544 sts = writev(send_fd_, &(iovv[0]), out_iov_idx);
00545
00546
00547 if (sts == -1)
00548 {
00549 if (errno == EAGAIN )
00550 {
00551 TLOG(TLVL_DEBUG) << GetTraceName() << ": sendFragment EWOULDBLOCK";
00552 fcntl(send_fd_, F_SETFL, 0);
00553 blocking = true;
00554
00555 goto do_again;
00556 }
00557 TLOG(TLVL_WARNING) << GetTraceName() << ": sendFragment_: WRITE ERROR: " << strerror(errno);
00558 connect_state = 0;
00559 close(send_fd_);
00560 send_fd_ = -1;
00561 return TransferInterface::CopyStatus::kErrorNotRequiringException;
00562 }
00563 else if (sts != this_write_bytes)
00564 {
00565
00566 TLOG(TLVL_DEBUG) << GetTraceName() << ": sendFragment writev sts(" << std::to_string(sts) << ")!=requested_send_bytes(" << std::to_string(this_write_bytes) << ")";
00567 total_written_bytes += sts;
00568
00569 for (ii = 0; (size_t)sts >= iovv[ii].iov_len; ++ii)
00570 sts -= iovv[ii].iov_len;
00571 in_iov_idx += ii;
00572 iovv[ii].iov_len -= sts;
00573 iovv[ii].iov_base = (uint8_t*)(iovv[ii].iov_base) + sts;
00574
00575
00576 out_iov_idx = 0;
00577 if (ii != 0)
00578 iovv[out_iov_idx] = iovv[ii];
00579
00580 this_write_bytes = iovv[out_iov_idx].iov_len;
00581
00582
00583
00584
00585 unsigned long additional = ((unsigned long)iov_in[in_iov_idx].iov_base + iov_in[in_iov_idx].iov_len)
00586 - ((unsigned long)iovv[out_iov_idx].iov_base + iovv[out_iov_idx].iov_len);
00587 if (additional)
00588 {
00589 iovv[out_iov_idx].iov_len += additional;
00590 this_write_bytes += additional;
00591 if (this_write_bytes > per_write_max_bytes)
00592 {
00593 iovv[out_iov_idx].iov_len -= this_write_bytes - per_write_max_bytes;
00594 this_write_bytes = per_write_max_bytes;
00595 }
00596 }
00597 ++out_iov_idx;
00598 TLOG(TLVL_TRACE) << GetTraceName() << ": sendFragment writev sts!=: this_write_bytes=" << std::to_string(this_write_bytes)
00599 << " out_iov_idx=" << std::to_string(out_iov_idx)
00600 << " additional=" << std::to_string(additional)
00601 << " ii=" << ii;
00602 }
00603 else
00604 {
00605 TLOG(TLVL_TRACE) << GetTraceName() << ": sendFragment writev sts(" << std::to_string(sts) << ")==requested_send_bytes(" << std::to_string(this_write_bytes) << ")";
00606 total_written_bytes += sts;
00607 --out_iov_idx;
00608 iovv[out_iov_idx].iov_base = (uint8_t*)(iovv[out_iov_idx].iov_base) + iovv[out_iov_idx].iov_len;
00609 iovv[out_iov_idx].iov_len = 0;
00610 in_iov_idx += out_iov_idx;
00611 this_write_bytes = 0;
00612
00613 unsigned long additional = ((unsigned long)iov_in[in_iov_idx].iov_base + iov_in[in_iov_idx].iov_len)
00614 - ((unsigned long)iovv[out_iov_idx].iov_base + iovv[out_iov_idx].iov_len);
00615 if (additional)
00616 {
00617 iovv[out_iov_idx].iov_len += additional;
00618 this_write_bytes += additional;
00619 if (this_write_bytes > per_write_max_bytes)
00620 {
00621 iovv[out_iov_idx].iov_len -= this_write_bytes - per_write_max_bytes;
00622 this_write_bytes = per_write_max_bytes;
00623 }
00624 if (out_iov_idx != 0)
00625 iovv[0] = iovv[out_iov_idx];
00626 out_iov_idx = 1;
00627 }
00628 else
00629 {
00630 ++in_iov_idx;
00631 out_iov_idx = 0;
00632 }
00633 }
00634 } while (total_written_bytes < total_to_write_bytes);
00635 if (total_written_bytes > total_to_write_bytes)
00636 TLOG(TLVL_ERROR) << GetTraceName() << ": sendFragment program error: too many bytes transferred";
00637
00638 if (blocking)
00639 {
00640 blocking = false;
00641 fcntl(send_fd_, F_SETFL, 0);
00642 }
00643 sts = total_written_bytes - sizeof(MessHead);
00644
00645 TLOG(14) << GetTraceName() << ": sendFragment sts=" << std::to_string(sts);
00646 return TransferInterface::CopyStatus::kSuccess;
00647 }
00648
00649
00650
00651 void artdaq::TCPSocketTransfer::stats_connect_()
00652 {
00653 std::cv_status sts;
00654 while (!stats_connect_stop_)
00655 {
00656 std::string desc;
00657 void* tag;
00658 std::function<void()> function;
00659 uint64_t ts_us;
00660
00661 int msdly = tmo_.get_next_timeout_msdly();
00662
00663 if (msdly <= 0)
00664 msdly = 2000;
00665
00666 std::unique_lock<std::mutex> lck(stopstatscvm_);
00667 sts = stopstatscv_.wait_until(lck
00668 , std::chrono::system_clock::now()
00669 + std::chrono::milliseconds(msdly));
00670 TLOG(15) << GetTraceName() << ": thread1 after wait_until(msdly=" << msdly << ") - sts=" << static_cast<int>(sts);
00671
00672 if (sts == std::cv_status::no_timeout)
00673 break;
00674
00675 auto sts = tmo_.get_next_expired_timeout(desc, &tag, function, &ts_us);
00676
00677 while (sts != -1 && desc != "")
00678 {
00679 if (function != NULL)
00680 function();
00681
00682 sts = tmo_.get_next_expired_timeout(desc, &tag, function, &ts_us);
00683 }
00684 }
00685 }
00686
00687 void artdaq::TCPSocketTransfer::connect_()
00688 {
00689 TLOG(TLVL_DEBUG) << GetTraceName() << ": Connecting sender socket";
00690 int sndbuf_bytes = static_cast<int>(sndbuf_);
00691 send_fd_ = TCPConnect(hostMap_[destination_rank()].hostname.c_str()
00692 , calculate_port_()
00693 , O_NONBLOCK
00694 , sndbuf_bytes);
00695 connect_state = 0;
00696 blocking = 0;
00697 TLOG(TLVL_DEBUG) << GetTraceName() << ": connect_ " + hostMap_[destination_rank()].hostname + ":" << calculate_port_() << " send_fd_=" << send_fd_;
00698 if (send_fd_ != -1)
00699 {
00700
00701 TLOG(TLVL_DEBUG) << GetTraceName() << ": connect_: Writing connect message";
00702 MessHead mh = { 0,MessHead::connect_v0,htons(source_rank()),{htonl(CONN_MAGIC)} };
00703 ssize_t sts = write(send_fd_, &mh, sizeof(mh));
00704 if (sts == -1)
00705 {
00706 TLOG(TLVL_ERROR) << GetTraceName() << ": connect_: Error writing connect message!";
00707
00708 connect_state = 0;
00709 close(send_fd_);
00710 send_fd_ = -1;
00711 }
00712 else
00713 {
00714 TLOG(TLVL_INFO) << GetTraceName() << ": connect_: Successfully connected";
00715
00716 connect_state = 1;
00717 }
00718 }
00719 }
00720
00721 void artdaq::TCPSocketTransfer::reconnect_()
00722 {
00723 TLOG(TLVL_TRACE) << GetTraceName() << ": check/reconnect";
00724 if (send_fd_ == -1 && role() == TransferInterface::Role::kSend) return connect_();
00725 }
00726
00727 void artdaq::TCPSocketTransfer::start_listen_thread_()
00728 {
00729 std::unique_lock<std::mutex> start_lock(listen_thread_mutex_);
00730 if (listen_thread_refcount_ == 0)
00731 {
00732 if (listen_thread_ && listen_thread_->joinable()) listen_thread_->join();
00733 listen_thread_refcount_ = 1;
00734 TLOG(TLVL_INFO) << GetTraceName() << ": Starting Listener Thread";
00735 listen_thread_ = std::make_unique<boost::thread>(&TCPSocketTransfer::listen_, this);
00736 }
00737 else
00738 {
00739 listen_thread_refcount_++;
00740 }
00741 }
00742
00743 void artdaq::TCPSocketTransfer::listen_()
00744 {
00745 int listen_fd = -1;
00746 while (listen_thread_refcount_ > 0)
00747 {
00748 TLOG(TLVL_TRACE) << "listen_: Listening/accepting new connections";
00749 if (listen_fd == -1)
00750 {
00751 TLOG(TLVL_DEBUG) << "listen_: Opening listener";
00752 listen_fd = TCP_listen_fd(calculate_port_(), rcvbuf_);
00753 }
00754 if (listen_fd == -1)
00755 {
00756 TLOG(TLVL_DEBUG) << "listen_: Error creating listen_fd!";
00757 break;
00758 }
00759
00760 int res;
00761 timeval tv = { 2,0 };
00762 fd_set rfds;
00763 FD_ZERO(&rfds);
00764 FD_SET(listen_fd, &rfds);
00765
00766 res = select(listen_fd + 1, &rfds, (fd_set *)0, (fd_set *)0, &tv);
00767 if (res > 0)
00768 {
00769 int sts;
00770 sockaddr_un un;
00771 socklen_t arglen = sizeof(un);
00772 int fd;
00773 TLOG(TLVL_DEBUG) << "listen_: Calling accept";
00774 fd = accept(listen_fd, (sockaddr *)&un, &arglen);
00775 TLOG(TLVL_DEBUG) << GetTraceName() << ": Done with accept";
00776
00777 TLOG(TLVL_DEBUG) << "listen_: Reading connect message";
00778 socklen_t lenlen = sizeof(tv);
00779
00780 setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, lenlen);
00781 MessHead mh;
00782 uint64_t mark_us = TimeUtils::gettimeofday_us();
00783 sts = read(fd, &mh, sizeof(mh));
00784 uint64_t delta_us = TimeUtils::gettimeofday_us() - mark_us;
00785 TLOG(TLVL_DEBUG) << "listen_: Read of connect message took " << delta_us << " microseconds.";
00786 if (sts != sizeof(mh))
00787 {
00788 TLOG(TLVL_DEBUG) << "listen_: Wrong message header length received!";
00789 close(fd);
00790 continue;
00791 }
00792
00793
00794 mh.source_id = ntohs(mh.source_id);
00795 if (ntohl(mh.conn_magic) != CONN_MAGIC || !(mh.message_type == MessHead::connect_v0))
00796 {
00797 TLOG(TLVL_DEBUG) << "listen_: Wrong magic bytes in header!";
00798 close(fd);
00799 continue;
00800 }
00801
00802
00803 connected_fds_[mh.source_id].insert(fd);
00804
00805 TLOG(TLVL_INFO) << "listen_: New fd is " << fd << " for source rank " << mh.source_id;
00806 }
00807 else
00808 {
00809 TLOG(16) << "listen_: No connections in timeout interval!";
00810 }
00811 }
00812
00813 TLOG(TLVL_INFO) << "listen_: Shutting down connection listener";
00814 if (listen_fd != -1) close(listen_fd);
00815 for (auto& rank : connected_fds_)
00816 {
00817 for (auto& fd : rank.second)
00818 {
00819 close(fd);
00820 }
00821 }
00822 connected_fds_.clear();
00823
00824 }
00825
00826 DEFINE_ARTDAQ_TRANSFER(artdaq::TCPSocketTransfer)