00001
00002
00003
00004
00005
00006
00007
00008 #include <stdlib.h>
00009 #include <sys/socket.h>
00010 #include <sys/un.h>
00011 #include <arpa/inet.h>
00012 #include <sys/types.h>
00013 #include <poll.h>
00014
00015
00016 #include <string>
00017 #include <fstream>
00018 #include <stdexcept>
00019
00020
00021 #define TRACE_NAME "TCPSocketTransfer"
00022 #include "artdaq/DAQdata/Globals.hh"
00023
00024
00025 #include "artdaq/TransferPlugins/TCPSocketTransfer.hh"
00026 #include "artdaq/DAQdata/TCP_listen_fd.hh"
00027 #include "artdaq/DAQdata/TCPConnect.hh"
00028 #include "artdaq/TransferPlugins/detail/Timeout.hh"
00029 #include "artdaq/TransferPlugins/detail/SRSockets.hh"
00030 #include "artdaq-core/Data/Fragment.hh"
00031 #include "artdaq-core/Utilities/TimeUtils.hh"
00032 #include <iomanip>
00033
00034 std::atomic<int> artdaq::TCPSocketTransfer::listen_thread_refcount_(0);
00035 std::unique_ptr<boost::thread> artdaq::TCPSocketTransfer::listen_thread_ = nullptr;
00036 std::map<int, std::set<int>> artdaq::TCPSocketTransfer::connected_fds_ = std::map<int, std::set<int>>();
00037 std::mutex artdaq::TCPSocketTransfer::listen_thread_mutex_;
00038 std::mutex artdaq::TCPSocketTransfer::connected_fd_mutex_;
00039
00040 artdaq::TCPSocketTransfer::
00041 TCPSocketTransfer(fhicl::ParameterSet const& pset, TransferInterface::Role role)
00042 : TransferInterface(pset, role)
00043 , send_fd_(-1)
00044 , active_receive_fd_(-1)
00045 , last_active_receive_fd_(-1)
00046 , rcvbuf_(pset.get<size_t>("tcp_receive_buffer_size", 0))
00047 , sndbuf_(max_fragment_size_words_ * sizeof(artdaq::RawDataType) * buffer_count_)
00048 , send_retry_timeout_us_(pset.get<size_t>("send_retry_timeout_us", 1000000))
00049 , stats_connect_stop_(false)
00050 , stats_connect_thread_(std::bind(&TCPSocketTransfer::stats_connect_, this))
00051 , timeoutMessageArmed_(true)
00052 , not_connected_count_(0)
00053 , receive_err_threshold_(pset.get<size_t>("receive_socket_disconnected_max_count", 1000))
00054 , receive_err_wait_us_(pset.get<size_t>("receive_socket_disconnected_wait_us", 10000))
00055 {
00056 TLOG(TLVL_DEBUG) << GetTraceName() << " Constructor: pset=" << pset.to_string() << ", role=" << (role == TransferInterface::Role::kReceive ? "kReceive" : "kSend");
00057 auto masterPortOffset = pset.get<int>("offset_all_ports", 0);
00058 hostMap_ = MakeHostMap(pset, masterPortOffset);
00059
00060 std::function<void()> function = std::bind(&TCPSocketTransfer::reconnect_, this);
00061 tmo_.add_periodic("reconnect", NULL, function, 200);
00062
00063 if (role == TransferInterface::Role::kReceive)
00064 {
00065
00066 TLOG(TLVL_DEBUG) << GetTraceName() << ": Listening for connections";
00067 start_listen_thread_();
00068 TLOG(TLVL_DEBUG) << GetTraceName() << ": Done Listening";
00069 }
00070 else
00071 {
00072 TLOG(TLVL_DEBUG) << GetTraceName() << ": Connecting to destination";
00073 connect_();
00074 TLOG(TLVL_DEBUG) << GetTraceName() << ": Done Connecting";
00075 }
00076 TLOG(TLVL_DEBUG) << GetTraceName() << ": End of Constructor";
00077 }
00078
00079 artdaq::TCPSocketTransfer::~TCPSocketTransfer() noexcept
00080 {
00081 TLOG(TLVL_DEBUG) << GetTraceName() << ": Shutting down TCPSocketTransfer";
00082 stats_connect_stop_ = true;
00083 stopstatscv_.notify_all();
00084 stats_connect_thread_.join();
00085
00086 if (role() == TransferInterface::Role::kSend)
00087 {
00088
00089 MessHead mh = { 0,MessHead::stop_v0,htons(TransferInterface::source_rank()),{0} };
00090 if (send_fd_ != -1)
00091 {
00092
00093 timeval tv = { 0,100000 };
00094 socklen_t len = sizeof(tv);
00095 setsockopt(send_fd_, SOL_SOCKET, SO_SNDTIMEO, &tv, len);
00096 write(send_fd_, &mh, sizeof(mh));
00097 }
00098 close(send_fd_);
00099 }
00100 else
00101 {
00102 {
00103 std::unique_lock<std::mutex> fd_lock(connected_fd_mutex_);
00104 auto it = connected_fds_[source_rank()].begin();
00105 while (it != connected_fds_[source_rank()].end())
00106 {
00107 close(*it);
00108 it = connected_fds_[source_rank()].erase(it);
00109 }
00110 connected_fds_.erase(source_rank());
00111 }
00112
00113 std::unique_lock<std::mutex> lk(listen_thread_mutex_);
00114 listen_thread_refcount_--;
00115 if (listen_thread_refcount_ == 0 && listen_thread_ && listen_thread_->joinable())
00116 {
00117 listen_thread_->join();
00118 }
00119 }
00120 TLOG(TLVL_DEBUG) << GetTraceName() << ": End of Destructor";
00121 }
00122
00123 int artdaq::TCPSocketTransfer::receiveFragmentHeader(detail::RawFragmentHeader& header, size_t timeout_usec)
00124 {
00125 TLOG(5) << GetTraceName() << ": receiveFragmentHeader: BEGIN";
00126 int ret_rank = RECV_TIMEOUT;
00127
00128 if (getConnectedFDCount(source_rank()) == 0)
00129 {
00130 if (++not_connected_count_ > receive_err_threshold_) { return DATA_END; }
00131 TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Receive socket not connected, returning RECV_TIMEOUT";
00132 usleep(receive_err_wait_us_);
00133 return RECV_TIMEOUT;
00134 }
00135 not_connected_count_ = 0;
00136
00137 TLOG(5) << GetTraceName() << ": receiveFragmentHeader timeout_usec=" << std::to_string(timeout_usec);
00138
00139 size_t byte_cnt = 0;
00140 int sts;
00141 int offset = 0;
00142 SocketState state = SocketState::Metadata;
00143 int target_bytes = sizeof(MessHead);
00144 uint64_t start_time_us = TimeUtils::gettimeofday_us();
00145
00146
00147
00148
00149
00150
00151
00152
00153 uint8_t* buff;
00154
00155 int timeout_ms;
00156 if (timeout_usec == 0)
00157 timeout_ms = 0;
00158 else
00159 timeout_ms = (timeout_usec + 999) / 1000;
00160
00161 bool done = false;
00162 while (!done && getConnectedFDCount(source_rank()) > 0)
00163 {
00164 if (active_receive_fd_ == -1)
00165 {
00166 std::unique_lock<std::mutex> lk(connected_fd_mutex_);
00167 size_t fd_count = connected_fds_[source_rank()].size();
00168 auto iter = connected_fds_[source_rank()].begin();
00169 std::vector<pollfd> pollfds(fd_count);
00170 for (size_t ii = 0; ii < fd_count; ++ii)
00171 {
00172 pollfds[ii].events = POLLIN | POLLERR;
00173 pollfds[ii].fd = *iter;
00174 ++iter;
00175 }
00176
00177
00178 int num_fds_ready = poll(&pollfds[0], fd_count, timeout_ms);
00179 if (num_fds_ready <= 0)
00180 {
00181 if (num_fds_ready == 0 && timeout_ms > 0)
00182 {
00183 TLOG(5) << GetTraceName() << ": receiveFragmentHeader: No data on receive socket, returning RECV_TIMEOUT";
00184 return RECV_TIMEOUT;
00185 }
00186 break;
00187 }
00188
00189 size_t index = 0;
00190 if (last_active_receive_fd_ != -1)
00191 {
00192 for (auto& pollfd : pollfds)
00193 {
00194 index++;
00195 if (pollfd.fd == last_active_receive_fd_)
00196 {
00197 break;
00198 }
00199 }
00200 }
00201
00202 int active_index = -1;
00203 short anomolous_events = 0;
00204 for (size_t ii = index; ii < index + pollfds.size(); ++ii)
00205 {
00206 if (pollfds[index % pollfds.size()].revents & (POLLIN | POLLPRI | POLLHUP | POLLERR))
00207 {
00208 active_index = index % pollfds.size();
00209 active_receive_fd_ = pollfds[active_index].fd;
00210 break;
00211 }
00212 else if (pollfds[index % pollfds.size()].revents & (POLLNVAL))
00213 {
00214 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: FD is closed, most likely because the peer went away. Removing from fd list.";
00215 close(pollfds[index].fd);
00216 std::unique_lock<std::mutex> lk(connected_fd_mutex_);
00217 connected_fds_[source_rank()].erase(pollfds[index].fd);
00218 continue;
00219 }
00220 else if (pollfds[index % pollfds.size()].revents)
00221 {
00222 anomolous_events |= pollfds[index % pollfds.size()].revents;
00223 }
00224 }
00225
00226 if (active_index == -1)
00227 {
00228 if (anomolous_events)
00229 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Wrong event received from a pollfd. Mask: " << static_cast<int>(anomolous_events);
00230 active_receive_fd_ = -1;
00231 continue;
00232 }
00233
00234 if (!done && timeout_usec > 0)
00235 {
00236
00237 size_t delta_us = TimeUtils::gettimeofday_us() - start_time_us;
00238 if (delta_us > timeout_usec)
00239 {
00240 return RECV_TIMEOUT;
00241 }
00242 timeout_ms = ((timeout_usec - delta_us) + 999) / 1000;
00243 }
00244 }
00245
00246 if (state == SocketState::Metadata)
00247 {
00248
00249 buff = &(mha[offset]);
00250 byte_cnt = sizeof(MessHead) - offset;
00251 }
00252 else
00253 {
00254
00255 buff = reinterpret_cast<uint8_t*>(&header) + offset;
00256 byte_cnt = mh.byte_count - offset;
00257 }
00258
00259 if (byte_cnt > 0)
00260 {
00261 TLOG(6) << GetTraceName() << ": receiveFragmentHeader: Reading " << byte_cnt << " bytes from socket";
00262 sts = read(active_receive_fd_, buff, byte_cnt);
00263 TLOG(6) << GetTraceName() << ": receiveFragmentHeader: Done with read";
00264 }
00265
00266 TLOG(7) << GetTraceName() << ": receiveFragmentHeader state=" << static_cast<int>(state) << " read=" << sts;
00267 if (sts < 0)
00268 {
00269 TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentHeader: Error on receive, closing socket " << " (errno=" << errno << ": " << strerror(errno) << ")";
00270 close(active_receive_fd_);
00271 std::unique_lock<std::mutex> lk(connected_fd_mutex_);
00272 connected_fds_[source_rank()].erase(active_receive_fd_);
00273 active_receive_fd_ = -1;
00274 }
00275 else
00276 {
00277
00278 sts = offset += sts;
00279 if (sts >= target_bytes)
00280 {
00281 TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Target read bytes reached. Changing state";
00282 offset = 0;
00283 if (state == SocketState::Metadata)
00284 {
00285 state = SocketState::Data;
00286 mh.byte_count = ntohl(mh.byte_count);
00287 mh.source_id = ntohs(mh.source_id);
00288 target_bytes = mh.byte_count;
00289
00290 if (mh.message_type == MessHead::stop_v0)
00291 {
00292 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Stop Message received. Closing socket " << active_receive_fd_;
00293 close(active_receive_fd_);
00294 std::unique_lock<std::mutex> lk(connected_fd_mutex_);
00295 connected_fds_[source_rank()].erase(active_receive_fd_);
00296 active_receive_fd_ = -1;
00297 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: There are now " << connected_fds_[source_rank()].size() << " active senders.";
00298 }
00299
00300 if (target_bytes == 0)
00301 {
00302
00303 return RECV_TIMEOUT;
00304 }
00305 }
00306 else
00307 {
00308 ret_rank = source_rank();
00309 TLOG(8) << GetTraceName() << ": receiveFragmentHeader done sts=" << sts << " src=" << ret_rank;
00310 TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Done receiving fragment header. Moving into output.";
00311
00312 done = true;
00313 break;
00314 }
00315 }
00316 }
00317
00318 }
00319
00320 TLOG(5) << GetTraceName() << ": receiveFragmentHeader: Returning " << ret_rank;
00321 return ret_rank;
00322 }
00323
00324 int artdaq::TCPSocketTransfer::receiveFragmentData(RawDataType* destination, size_t)
00325 {
00326 TLOG(9) << GetTraceName() << ": receiveFragmentData: BEGIN";
00327 int ret_rank = RECV_TIMEOUT;
00328 if (active_receive_fd_ == -1)
00329 {
00330 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Receive socket not connected, returning RECV_TIMEOUT";
00331 return RECV_TIMEOUT;
00332 }
00333
00334
00335 uint8_t* buff;
00336 size_t byte_cnt = 0;
00337 int sts;
00338 int offset = 0;
00339 SocketState state = SocketState::Metadata;
00340 int target_bytes = sizeof(MessHead);
00341
00342 pollfd pollfd_s;
00343 pollfd_s.events = POLLIN | POLLPRI | POLLERR;
00344 pollfd_s.fd = active_receive_fd_;
00345
00346 bool done = false;
00347 while (!done)
00348 {
00349 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Polling fd to see if there's data";
00350 int num_fds_ready = poll(&pollfd_s, 1, 1000);
00351 if (num_fds_ready <= 0)
00352 {
00353 if (num_fds_ready == 0)
00354 {
00355 TLOG(9) << GetTraceName() << ": receiveFragmentData: No data on receive socket, returning RECV_TIMEOUT";
00356 active_receive_fd_ = -1;
00357 return RECV_TIMEOUT;
00358 }
00359
00360 TLOG(TLVL_ERROR) << "Error in poll: errno=" << errno;
00361 active_receive_fd_ = -1;
00362 break;
00363 }
00364
00365 if (pollfd_s.revents & (POLLNVAL))
00366 {
00367 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: FD is closed, most likely because the peer went away. Removing from fd list.";
00368 close(active_receive_fd_);
00369 std::unique_lock<std::mutex> lk(connected_fd_mutex_);
00370 connected_fds_[source_rank()].erase(active_receive_fd_);
00371 active_receive_fd_ = -1;
00372 break;
00373 }
00374 else if (!(pollfd_s.revents & (POLLIN | POLLPRI | POLLERR)))
00375 {
00376 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Wrong event received from pollfd: " << pollfd_s.revents;
00377 close(active_receive_fd_);
00378 std::unique_lock<std::mutex> lk(connected_fd_mutex_);
00379 connected_fds_[source_rank()].erase(active_receive_fd_);
00380 continue;
00381 }
00382
00383 if (state == SocketState::Metadata)
00384 {
00385
00386 buff = &(mha[offset]);
00387 byte_cnt = sizeof(MessHead) - offset;
00388 }
00389 else
00390 {
00391
00392 buff = reinterpret_cast<uint8_t*>(destination) + offset;
00393 byte_cnt = mh.byte_count - offset;
00394 }
00395
00396
00397 sts = read(active_receive_fd_, buff, byte_cnt);
00398
00399
00400 TLOG(10) << GetTraceName() << ": recvFragment state=" << static_cast<int>(state) << " read=" << sts;
00401 if (sts < 0)
00402 {
00403 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Error on receive, closing socket"
00404 << " (errno=" << errno << ": " << strerror(errno) << ")";
00405 close(active_receive_fd_);
00406 std::unique_lock<std::mutex> lk(connected_fd_mutex_);
00407 connected_fds_[source_rank()].erase(active_receive_fd_);
00408 active_receive_fd_ = -1;
00409 }
00410 else
00411 {
00412
00413 sts = offset += sts;
00414 if (sts >= target_bytes)
00415 {
00416 TLOG(9) << GetTraceName() << ": receiveFragmentData: Target read bytes reached. Changing state";
00417 offset = 0;
00418 if (state == SocketState::Metadata)
00419 {
00420 state = SocketState::Data;
00421 mh.byte_count = ntohl(mh.byte_count);
00422 mh.source_id = ntohs(mh.source_id);
00423 target_bytes = mh.byte_count;
00424 }
00425 else
00426 {
00427 ret_rank = source_rank();
00428 TLOG(11) << GetTraceName() << ": receiveFragmentData done sts=" << sts << " src=" << ret_rank;
00429 TLOG(9) << GetTraceName() << ": receiveFragmentData: Done receiving fragment. Moving into output.";
00430
00431 done = true;
00432 break;
00433 }
00434 }
00435 }
00436
00437
00438 if (target_bytes == 0 && state == SocketState::Data)
00439 {
00440 ret_rank = source_rank();
00441 TLOG(11) << GetTraceName() << ": receiveFragmentData done sts=" << sts << " src=" << ret_rank;
00442 TLOG(9) << GetTraceName() << ": receiveFragmentData: Done receiving fragment. Moving into output.";
00443
00444 done = true;
00445 }
00446
00447 }
00448
00449 last_active_receive_fd_ = active_receive_fd_;
00450 active_receive_fd_ = -1;
00451
00452 TLOG(9) << GetTraceName() << ": receiveFragmentData: Returning " << ret_rank;
00453 return ret_rank;
00454 }
00455
00456 bool artdaq::TCPSocketTransfer::isRunning()
00457 {
00458 switch (role())
00459 {
00460 case TransferInterface::Role::kSend:
00461 return send_fd_ != -1;
00462 case TransferInterface::Role::kReceive:
00463 TLOG(TLVL_DEBUG) << GetTraceName() << ": isRunning: There are " << getConnectedFDCount(source_rank()) << " fds connected.";
00464 return getConnectedFDCount(source_rank()) > 0;
00465 }
00466 return false;
00467 }
00468
00469
00470
00471 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendFragment_(Fragment&& frag, size_t send_timeout_usec)
00472 {
00473 TLOG(12) << GetTraceName() << ": sendFragment begin";
00474 artdaq::Fragment grab_ownership_frag = std::move(frag);
00475
00476
00477
00478 iovec iov = { reinterpret_cast<void*>(grab_ownership_frag.headerAddress()),
00479 detail::RawFragmentHeader::num_words() * sizeof(RawDataType) };
00480
00481 auto sts = sendData_(&iov, 1, send_retry_timeout_us_);
00482 auto start_time = std::chrono::steady_clock::now();
00483
00484 while (sts != CopyStatus::kSuccess && (send_timeout_usec == 0 || TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_timeout_usec) && TimeUtils::GetElapsedTimeMicroseconds(start_time) < 10000000)
00485 {
00486 TLOG(13) << GetTraceName() << ": sendFragment: Timeout or Error sending fragment";
00487 sts = sendData_(&iov, 1, send_retry_timeout_us_);
00488 usleep(1000);
00489 }
00490 if (sts != CopyStatus::kSuccess) return sts;
00491
00492
00493
00494 iov = { reinterpret_cast<void*>(grab_ownership_frag.headerAddress() + detail::RawFragmentHeader::num_words()),
00495 grab_ownership_frag.sizeBytes() - detail::RawFragmentHeader::num_words() * sizeof(RawDataType) };
00496 sts = sendData_(&iov, 1, send_retry_timeout_us_);
00497 start_time = std::chrono::steady_clock::now();
00498 while (sts != CopyStatus::kSuccess && (send_timeout_usec == 0 || TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_timeout_usec) && TimeUtils::GetElapsedTimeMicroseconds(start_time) < 10000000)
00499 {
00500 TLOG(13) << GetTraceName() << ": sendFragment: Timeout or Error sending fragment";
00501 sts = sendData_(&iov, 1, send_retry_timeout_us_);
00502 usleep(1000);
00503 }
00504
00505 TLOG(12) << GetTraceName() << ": sendFragment returning kSuccess";
00506 return sts;
00507 }
00508
00509 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendData_(const void* buf, size_t bytes, size_t send_timeout_usec)
00510 {
00511 TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_ Converting buf to iovec";
00512 iovec iov = { (void*)buf, bytes };
00513 return sendData_(&iov, 1, send_timeout_usec);
00514 }
00515
00516 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendData_(const struct iovec* iov, int iovcnt, size_t send_timeout_usec)
00517 {
00518
00519 if (send_fd_ == -1)
00520 {
00521 if (timeoutMessageArmed_)
00522 {
00523 TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_: Send fd is not open. Returning kTimeout";
00524 timeoutMessageArmed_ = false;
00525 }
00526 return CopyStatus::kTimeout;
00527 }
00528 timeoutMessageArmed_ = true;
00529 TLOG(14) << GetTraceName() << ": send_timeout_usec is " << std::to_string(send_timeout_usec) << ", currently unused.";
00530
00531
00532 uint32_t total_to_write_bytes = 0;
00533 std::vector<iovec> iov_in(iovcnt + 1);
00534 std::vector<iovec> iovv(iovcnt + 2);
00535 int ii;
00536 for (ii = 0; ii < iovcnt; ++ii)
00537 {
00538 iov_in[ii + 1] = iov[ii];
00539 total_to_write_bytes += iov[ii].iov_len;
00540 }
00541
00542 MessHead mh = { 0,MessHead::data_v0,htons(source_rank()),{htonl(total_to_write_bytes)} };
00543 iov_in[0].iov_base = &mh;
00544 iov_in[0].iov_len = sizeof(mh);
00545 total_to_write_bytes += sizeof(mh);
00546
00547 ssize_t sts = 0;
00548 ssize_t total_written_bytes = 0;
00549 ssize_t per_write_max_bytes = (32 * 1024);
00550
00551 size_t in_iov_idx = 0;
00552 size_t out_iov_idx = 0;
00553 ssize_t this_write_bytes = 0;
00554
00555 do
00556 {
00557
00558
00559 for (;
00560 (in_iov_idx + out_iov_idx) < iov_in.size() && this_write_bytes < per_write_max_bytes;
00561 ++out_iov_idx)
00562 {
00563 this_write_bytes += iov_in[in_iov_idx + out_iov_idx].iov_len;
00564 iovv[out_iov_idx] = iov_in[in_iov_idx + out_iov_idx];
00565 }
00566 if (this_write_bytes > per_write_max_bytes)
00567 {
00568 iovv[out_iov_idx - 1].iov_len -= this_write_bytes - per_write_max_bytes;
00569 this_write_bytes = per_write_max_bytes;
00570 }
00571
00572
00573 do_again:
00574 TLOG(14) << GetTraceName() << ": sendFragment b4 writev " << std::setw(7) << std::to_string(total_written_bytes) << " total_written_bytes send_fd_=" << send_fd_ << " in_idx=" << std::to_string(in_iov_idx)
00575 << " iovcnt=" << std::to_string(out_iov_idx) << " 1st.len=" << std::to_string(iovv[0].iov_len);
00576
00577 sts = writev(send_fd_, &(iovv[0]), out_iov_idx);
00578
00579
00580 if (sts == -1)
00581 {
00582 if (errno == EAGAIN )
00583 {
00584 TLOG(TLVL_DEBUG) << GetTraceName() << ": sendFragment EWOULDBLOCK";
00585 fcntl(send_fd_, F_SETFL, 0);
00586 blocking = true;
00587
00588 goto do_again;
00589 }
00590 TLOG(TLVL_WARNING) << GetTraceName() << ": sendFragment_: WRITE ERROR: " << strerror(errno);
00591 connect_state = 0;
00592 close(send_fd_);
00593 send_fd_ = -1;
00594 return TransferInterface::CopyStatus::kErrorNotRequiringException;
00595 }
00596 else if (sts != this_write_bytes)
00597 {
00598
00599 TLOG(TLVL_DEBUG) << GetTraceName() << ": sendFragment writev sts(" << std::to_string(sts) << ")!=requested_send_bytes(" << std::to_string(this_write_bytes) << ")";
00600 total_written_bytes += sts;
00601
00602 for (ii = 0; (size_t)sts >= iovv[ii].iov_len; ++ii)
00603 sts -= iovv[ii].iov_len;
00604 in_iov_idx += ii;
00605 iovv[ii].iov_len -= sts;
00606 iovv[ii].iov_base = (uint8_t*)(iovv[ii].iov_base) + sts;
00607
00608
00609 out_iov_idx = 0;
00610 if (ii != 0)
00611 iovv[out_iov_idx] = iovv[ii];
00612
00613 this_write_bytes = iovv[out_iov_idx].iov_len;
00614
00615
00616
00617
00618 unsigned long additional = ((unsigned long)iov_in[in_iov_idx].iov_base + iov_in[in_iov_idx].iov_len)
00619 - ((unsigned long)iovv[out_iov_idx].iov_base + iovv[out_iov_idx].iov_len);
00620 if (additional)
00621 {
00622 iovv[out_iov_idx].iov_len += additional;
00623 this_write_bytes += additional;
00624 if (this_write_bytes > per_write_max_bytes)
00625 {
00626 iovv[out_iov_idx].iov_len -= this_write_bytes - per_write_max_bytes;
00627 this_write_bytes = per_write_max_bytes;
00628 }
00629 }
00630 ++out_iov_idx;
00631 TLOG(TLVL_TRACE) << GetTraceName() << ": sendFragment writev sts!=: this_write_bytes=" << std::to_string(this_write_bytes)
00632 << " out_iov_idx=" << std::to_string(out_iov_idx)
00633 << " additional=" << std::to_string(additional)
00634 << " ii=" << ii;
00635 }
00636 else
00637 {
00638 TLOG(TLVL_TRACE) << GetTraceName() << ": sendFragment writev sts(" << std::to_string(sts) << ")==requested_send_bytes(" << std::to_string(this_write_bytes) << ")";
00639 total_written_bytes += sts;
00640 --out_iov_idx;
00641 iovv[out_iov_idx].iov_base = (uint8_t*)(iovv[out_iov_idx].iov_base) + iovv[out_iov_idx].iov_len;
00642 iovv[out_iov_idx].iov_len = 0;
00643 in_iov_idx += out_iov_idx;
00644 this_write_bytes = 0;
00645
00646 unsigned long additional = ((unsigned long)iov_in[in_iov_idx].iov_base + iov_in[in_iov_idx].iov_len)
00647 - ((unsigned long)iovv[out_iov_idx].iov_base + iovv[out_iov_idx].iov_len);
00648 if (additional)
00649 {
00650 iovv[out_iov_idx].iov_len += additional;
00651 this_write_bytes += additional;
00652 if (this_write_bytes > per_write_max_bytes)
00653 {
00654 iovv[out_iov_idx].iov_len -= this_write_bytes - per_write_max_bytes;
00655 this_write_bytes = per_write_max_bytes;
00656 }
00657 if (out_iov_idx != 0)
00658 iovv[0] = iovv[out_iov_idx];
00659 out_iov_idx = 1;
00660 }
00661 else
00662 {
00663 ++in_iov_idx;
00664 out_iov_idx = 0;
00665 }
00666 }
00667 } while (total_written_bytes < total_to_write_bytes);
00668 if (total_written_bytes > total_to_write_bytes)
00669 TLOG(TLVL_ERROR) << GetTraceName() << ": sendFragment program error: too many bytes transferred";
00670
00671 if (blocking)
00672 {
00673 blocking = false;
00674 fcntl(send_fd_, F_SETFL, 0);
00675 }
00676 sts = total_written_bytes - sizeof(MessHead);
00677
00678 TLOG(14) << GetTraceName() << ": sendFragment sts=" << std::to_string(sts);
00679 return TransferInterface::CopyStatus::kSuccess;
00680 }
00681
00682
00683
00684 void artdaq::TCPSocketTransfer::stats_connect_()
00685 {
00686 std::cv_status sts;
00687 while (!stats_connect_stop_)
00688 {
00689 std::string desc;
00690 void* tag;
00691 std::function<void()> function;
00692 uint64_t ts_us;
00693
00694 int msdly = tmo_.get_next_timeout_msdly();
00695
00696 if (msdly <= 0)
00697 msdly = 2000;
00698
00699 std::unique_lock<std::mutex> lck(stopstatscvm_);
00700 sts = stopstatscv_.wait_until(lck
00701 , std::chrono::system_clock::now()
00702 + std::chrono::milliseconds(msdly));
00703 TLOG(15) << GetTraceName() << ": thread1 after wait_until(msdly=" << msdly << ") - sts=" << static_cast<int>(sts);
00704
00705 if (sts == std::cv_status::no_timeout)
00706 break;
00707
00708 auto sts = tmo_.get_next_expired_timeout(desc, &tag, function, &ts_us);
00709
00710 while (sts != -1 && desc != "")
00711 {
00712 if (function != NULL)
00713 function();
00714
00715 sts = tmo_.get_next_expired_timeout(desc, &tag, function, &ts_us);
00716 }
00717 }
00718 }
00719
00720 void artdaq::TCPSocketTransfer::connect_()
00721 {
00722 TLOG(TLVL_DEBUG) << GetTraceName() << ": Connecting sender socket";
00723 int sndbuf_bytes = static_cast<int>(sndbuf_);
00724 send_fd_ = TCPConnect(hostMap_[destination_rank()].hostname.c_str()
00725 , calculate_port_()
00726 , O_NONBLOCK
00727 , sndbuf_bytes);
00728 connect_state = 0;
00729 blocking = 0;
00730 TLOG(TLVL_DEBUG) << GetTraceName() << ": connect_ " + hostMap_[destination_rank()].hostname + ":" << calculate_port_() << " send_fd_=" << send_fd_;
00731 if (send_fd_ != -1)
00732 {
00733
00734 TLOG(TLVL_DEBUG) << GetTraceName() << ": connect_: Writing connect message";
00735 MessHead mh = { 0,MessHead::connect_v0,htons(source_rank()),{htonl(CONN_MAGIC)} };
00736 ssize_t sts = write(send_fd_, &mh, sizeof(mh));
00737 if (sts == -1)
00738 {
00739 TLOG(TLVL_ERROR) << GetTraceName() << ": connect_: Error writing connect message!";
00740
00741 connect_state = 0;
00742 close(send_fd_);
00743 send_fd_ = -1;
00744 }
00745 else
00746 {
00747 TLOG(TLVL_INFO) << GetTraceName() << ": connect_: Successfully connected";
00748
00749 connect_state = 1;
00750 }
00751 }
00752 }
00753
00754 void artdaq::TCPSocketTransfer::reconnect_()
00755 {
00756 TLOG(TLVL_TRACE) << GetTraceName() << ": check/reconnect";
00757 if (send_fd_ == -1 && role() == TransferInterface::Role::kSend) return connect_();
00758 }
00759
00760 void artdaq::TCPSocketTransfer::start_listen_thread_()
00761 {
00762 std::unique_lock<std::mutex> start_lock(listen_thread_mutex_);
00763 if (listen_thread_refcount_ == 0)
00764 {
00765 if (listen_thread_ && listen_thread_->joinable()) listen_thread_->join();
00766 listen_thread_refcount_ = 1;
00767 TLOG(TLVL_INFO) << GetTraceName() << ": Starting Listener Thread";
00768 listen_thread_ = std::make_unique<boost::thread>(&TCPSocketTransfer::listen_, calculate_port_(), rcvbuf_);
00769 }
00770 else
00771 {
00772 listen_thread_refcount_++;
00773 }
00774 }
00775
00776 void artdaq::TCPSocketTransfer::listen_(int port, size_t rcvbuf)
00777 {
00778 int listen_fd = -1;
00779 while (listen_thread_refcount_ > 0)
00780 {
00781 TLOG(TLVL_TRACE) << "listen_: Listening/accepting new connections";
00782 if (listen_fd == -1)
00783 {
00784 TLOG(TLVL_DEBUG) << "listen_: Opening listener";
00785 listen_fd = TCP_listen_fd(port, rcvbuf);
00786 }
00787 if (listen_fd == -1)
00788 {
00789 TLOG(TLVL_DEBUG) << "listen_: Error creating listen_fd!";
00790 break;
00791 }
00792
00793 int res;
00794 timeval tv = { 2,0 };
00795 fd_set rfds;
00796 FD_ZERO(&rfds);
00797 FD_SET(listen_fd, &rfds);
00798
00799 res = select(listen_fd + 1, &rfds, (fd_set *)0, (fd_set *)0, &tv);
00800 if (res > 0)
00801 {
00802 int sts;
00803 sockaddr_un un;
00804 socklen_t arglen = sizeof(un);
00805 int fd;
00806 TLOG(TLVL_DEBUG) << "listen_: Calling accept";
00807 fd = accept(listen_fd, (sockaddr *)&un, &arglen);
00808 TLOG(TLVL_DEBUG) << "listen_: Done with accept";
00809
00810 TLOG(TLVL_DEBUG) << "listen_: Reading connect message";
00811 socklen_t lenlen = sizeof(tv);
00812
00813 setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, lenlen);
00814 MessHead mh;
00815 uint64_t mark_us = TimeUtils::gettimeofday_us();
00816 sts = read(fd, &mh, sizeof(mh));
00817 uint64_t delta_us = TimeUtils::gettimeofday_us() - mark_us;
00818 TLOG(TLVL_DEBUG) << "listen_: Read of connect message took " << delta_us << " microseconds.";
00819 if (sts != sizeof(mh))
00820 {
00821 TLOG(TLVL_DEBUG) << "listen_: Wrong message header length received!";
00822 close(fd);
00823 continue;
00824 }
00825
00826
00827 mh.source_id = ntohs(mh.source_id);
00828 if (ntohl(mh.conn_magic) != CONN_MAGIC || !(mh.message_type == MessHead::connect_v0))
00829 {
00830 TLOG(TLVL_DEBUG) << "listen_: Wrong magic bytes in header!";
00831 close(fd);
00832 continue;
00833 }
00834
00835
00836 std::unique_lock<std::mutex> lk(connected_fd_mutex_);
00837 connected_fds_[mh.source_id].insert(fd);
00838
00839 TLOG(TLVL_INFO) << "listen_: New fd is " << fd << " for source rank " << mh.source_id;
00840 }
00841 else
00842 {
00843 TLOG(16) << "listen_: No connections in timeout interval!";
00844 }
00845 }
00846
00847 TLOG(TLVL_INFO) << "listen_: Shutting down connection listener";
00848 if (listen_fd != -1) close(listen_fd);
00849 std::unique_lock<std::mutex> lk(connected_fd_mutex_);
00850 auto it = connected_fds_.begin();
00851 while (it != connected_fds_.end())
00852 {
00853 auto& fd_set = it->second;
00854 auto rank_it = fd_set.begin();
00855 while (rank_it != fd_set.end())
00856 {
00857 close(*rank_it);
00858 rank_it = fd_set.erase(rank_it);
00859 }
00860 it = connected_fds_.erase(it);
00861 }
00862
00863 }
00864
00865 DEFINE_ARTDAQ_TRANSFER(artdaq::TCPSocketTransfer)