$treeview $search $mathjax $extrastylesheet
artdaq
v3_04_01
$projectbrief
|
$projectbrief
|
$searchbox |
00001 // Sep 14, 2016. "TERMS AND CONDITIONS" governing this file are in the README 00002 // or COPYING file. If you do not have such a file, one can be obtained by 00003 // contacting Ron or Fermi Lab in Batavia IL, 60510, phone: 630-840-3000. 00004 // $RCSfile: .emacs.gnu,v $ 00005 // rev="$Revision: 1.30 $$Date: 2016/03/01 14:27:27 $"; 00006 00007 // C Includes 00008 #include <stdlib.h> // atoi, strtoul 00009 #include <sys/socket.h> // socket, socklen_t 00010 #include <sys/un.h> // sockaddr_un 00011 #include <arpa/inet.h> // ntohl, ntohs 00012 #include <sys/types.h> // size_t 00013 #include <poll.h> // struct pollfd 00014 00015 // C++ Includes 00016 #include <string> 00017 #include <fstream> 00018 #include <stdexcept> 00019 00020 // product Includes 00021 #define TRACE_NAME (app_name + "_TCPSocketTransfer").c_str() 00022 #include "artdaq/DAQdata/Globals.hh" 00023 00024 // artdaq Includes 00025 #include "artdaq/TransferPlugins/TCPSocketTransfer.hh" 00026 #include "artdaq/DAQdata/TCP_listen_fd.hh" 00027 #include "artdaq/DAQdata/TCPConnect.hh" 00028 #include "artdaq/TransferPlugins/detail/Timeout.hh" 00029 #include "artdaq/TransferPlugins/detail/SRSockets.hh" 00030 #include "artdaq-core/Data/Fragment.hh" 00031 #include "artdaq-core/Utilities/TimeUtils.hh" 00032 #include <iomanip> 00033 00034 std::atomic<int> artdaq::TCPSocketTransfer::listen_thread_refcount_(0); 00035 std::unique_ptr<boost::thread> artdaq::TCPSocketTransfer::listen_thread_ = nullptr; 00036 std::map<int, std::set<int>> artdaq::TCPSocketTransfer::connected_fds_ = std::map<int, std::set<int>>(); 00037 std::mutex artdaq::TCPSocketTransfer::listen_thread_mutex_; 00038 std::mutex artdaq::TCPSocketTransfer::connected_fd_mutex_; 00039 00040 artdaq::TCPSocketTransfer:: 00041 TCPSocketTransfer(fhicl::ParameterSet const& pset, TransferInterface::Role role) 00042 : TransferInterface(pset, role) 00043 , send_fd_(-1) 00044 , active_receive_fd_(-1) 00045 , last_active_receive_fd_(-1) 00046 , rcvbuf_(pset.get<size_t>("tcp_receive_buffer_size", 0)) 00047 , sndbuf_(pset.get<size_t>("tcp_send_buffer_size", max_fragment_size_words_ * sizeof(artdaq::RawDataType) * buffer_count_)) 00048 , send_retry_timeout_us_(pset.get<size_t>("send_retry_timeout_us", 1000000)) 00049 , timeoutMessageArmed_(true) 00050 , last_recv_time_() 00051 , receive_disconnected_wait_s_(pset.get<double>("receive_socket_disconnected_wait_s", 10.0)) 00052 , receive_err_wait_us_(pset.get<size_t>("receive_socket_disconnected_wait_us", 10000)) 00053 , receive_socket_has_been_connected_(false) 00054 , send_ack_diff_(0) 00055 { 00056 TLOG(TLVL_DEBUG) << GetTraceName() << " Constructor: pset=" << pset.to_string() << ", role=" << (role == TransferInterface::Role::kReceive ? "kReceive" : "kSend"); 00057 00058 if (role == TransferInterface::Role::kReceive) 00059 { 00060 // Wait for sender to connect... 00061 TLOG(TLVL_DEBUG) << GetTraceName() << ": Listening for connections"; 00062 start_listen_thread_(); 00063 TLOG(TLVL_DEBUG) << GetTraceName() << ": Done Listening"; 00064 } 00065 else 00066 { 00067 hostMap_ = MakeHostMap(pset); 00068 TLOG(TLVL_DEBUG) << GetTraceName() << ": Connecting to destination"; 00069 connect_(); 00070 TLOG(TLVL_DEBUG) << GetTraceName() << ": Done Connecting"; 00071 } 00072 TLOG(TLVL_DEBUG) << GetTraceName() << ": End of Constructor"; 00073 } 00074 00075 artdaq::TCPSocketTransfer::~TCPSocketTransfer() noexcept 00076 { 00077 TLOG(TLVL_DEBUG) << GetTraceName() << ": Shutting down TCPSocketTransfer"; 00078 00079 if (role() == TransferInterface::Role::kSend) 00080 { 00081 // close all open connections (send stop_v0) first 00082 MessHead mh = { 0,MessHead::stop_v0,htons(TransferInterface::source_rank()),{0} }; 00083 if (send_fd_ != -1) 00084 { 00085 // should be blocking with modest timeo 00086 timeval tv = { 0,100000 }; 00087 socklen_t len = sizeof(tv); 00088 setsockopt(send_fd_, SOL_SOCKET, SO_SNDTIMEO, &tv, len); 00089 write(send_fd_, &mh, sizeof(mh)); 00090 } 00091 close(send_fd_); 00092 send_fd_ = -1; 00093 } 00094 else 00095 { 00096 { 00097 std::unique_lock<std::mutex> fd_lock(connected_fd_mutex_); 00098 if (connected_fds_.count(source_rank())) 00099 { 00100 auto it = connected_fds_[source_rank()].begin(); 00101 while (it != connected_fds_[source_rank()].end()) 00102 { 00103 close(*it); 00104 it = connected_fds_[source_rank()].erase(it); 00105 } 00106 connected_fds_.erase(source_rank()); 00107 } 00108 if (ack_listen_thread_ && ack_listen_thread_->joinable()) ack_listen_thread_->join(); 00109 } 00110 00111 std::unique_lock<std::mutex> lk(listen_thread_mutex_); 00112 listen_thread_refcount_--; 00113 if (listen_thread_refcount_ <= 0 && listen_thread_ && listen_thread_->joinable()) 00114 { 00115 listen_thread_->join(); 00116 } 00117 } 00118 00119 TLOG(TLVL_DEBUG) << GetTraceName() << ": End of Destructor"; 00120 } 00121 00122 int artdaq::TCPSocketTransfer::receiveFragmentHeader(detail::RawFragmentHeader& header, size_t timeout_usec) 00123 { 00124 TLOG(5) << GetTraceName() << ": receiveFragmentHeader: BEGIN"; 00125 int ret_rank = RECV_TIMEOUT; 00126 00127 // Don't bomb out until received at least one connection... 00128 if (getConnectedFDCount(source_rank()) == 0) 00129 { // what if just listen_fd??? 00130 // if (receive_socket_has_been_connected_ && TimeUtils::GetElapsedTime(last_recv_time_) > receive_disconnected_wait_s_) 00131 // { 00132 // TLOG(TLVL_ERROR) << GetTraceName() << ": receiveFragmentHeader: senders have been disconnected for " 00133 // << TimeUtils::GetElapsedTime(last_recv_time_) << " s (receive_socket_disconnected_wait_s = " << receive_disconnected_wait_s_ << " s). RETURNING DATA_END!"; 00134 // return DATA_END; 00135 // } 00136 //if (++not_connected_count_ > receive_err_threshold_) { return DATA_END; } 00137 TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Receive socket not connected, returning RECV_TIMEOUT"; 00138 usleep(receive_err_wait_us_); 00139 return RECV_TIMEOUT; 00140 } 00141 receive_socket_has_been_connected_ = true; 00142 last_recv_time_ = std::chrono::steady_clock::now(); 00143 00144 TLOG(5) << GetTraceName() << ": receiveFragmentHeader timeout_usec=" << timeout_usec; 00145 //void* buff=alloca(max_fragment_size_words_*8); 00146 size_t byte_cnt = 0; 00147 int sts; 00148 int offset = 0; 00149 SocketState state = SocketState::Metadata; 00150 int target_bytes = sizeof(MessHead); 00151 uint64_t start_time_us = TimeUtils::gettimeofday_us(); 00152 00153 //while (active_receive_fd_ != -1) 00154 //{ 00155 // TLOG(TLVL_TRACE) << GetTraceName() << ": Currently receiving from fd " << active_receive_fd_ << ", waiting!"; 00156 // usleep(1000); 00157 //} 00158 00159 00160 uint8_t* buff; 00161 00162 int timeout_ms; 00163 if (timeout_usec == 0) 00164 timeout_ms = 0; 00165 else 00166 timeout_ms = (timeout_usec + 999) / 1000; // want at least 1 ms 00167 00168 bool done = false; 00169 bool noDataWarningSent = false; 00170 int loop_guard = 0; 00171 00172 while (!done && getConnectedFDCount(source_rank()) > 0) 00173 { 00174 if (active_receive_fd_ == -1) 00175 { 00176 loop_guard = 0; 00177 size_t fd_count = 0; 00178 std::vector<pollfd> pollfds; 00179 { 00180 std::unique_lock<std::mutex> lk(connected_fd_mutex_); 00181 fd_count = connected_fds_[source_rank()].size(); 00182 pollfds.resize(fd_count); 00183 auto iter = connected_fds_[source_rank()].begin(); 00184 for (size_t ii = 0; ii < fd_count; ++ii) 00185 { 00186 pollfds[ii].events = POLLIN | POLLPRI | POLLERR; 00187 pollfds[ii].fd = *iter; 00188 ++iter; 00189 } 00190 } 00191 //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragment: Polling fd to see if there's data" ; 00192 int num_fds_ready = poll(&pollfds[0], fd_count, timeout_ms); 00193 if (num_fds_ready <= 0) 00194 { 00195 TLOG(5) << GetTraceName() << ": receiveFragmentHeader: No data on receive socket, returning RECV_TIMEOUT"; 00196 return RECV_TIMEOUT; 00197 } 00198 00199 size_t index = 0; 00200 if (last_active_receive_fd_ != -1) 00201 { 00202 for (auto& pollfd : pollfds) 00203 { 00204 index++; 00205 if (pollfd.fd == last_active_receive_fd_) 00206 { 00207 break; 00208 } 00209 } 00210 } 00211 00212 int active_index = -1; 00213 short anomolous_events = 0; 00214 for (size_t ii = index; ii < index + pollfds.size(); ++ii) 00215 { 00216 auto pollfd_index = (ii + index) % pollfds.size(); 00217 if (pollfds[pollfd_index].revents & (POLLIN | POLLPRI)) 00218 { 00219 active_index = pollfd_index; 00220 active_receive_fd_ = pollfds[active_index].fd; 00221 active_revents_ = pollfds[active_index].revents;; 00222 break; 00223 } 00224 else if (pollfds[pollfd_index].revents & (POLLHUP | POLLERR)) 00225 { 00226 disconnect_receive_socket_(pollfds[pollfd_index].fd, "Poll returned POLLHUP or POLLERR, indicating problems with the sender."); 00227 continue; 00228 } 00229 else if (pollfds[pollfd_index].revents & (POLLNVAL)) 00230 { 00231 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: FD is closed, most likely because the peer went away. Removing from fd list."; 00232 disconnect_receive_socket_(pollfds[pollfd_index].fd, "FD is closed, most likely because the peer went away."); 00233 continue; 00234 } 00235 else if (pollfds[pollfd_index].revents) 00236 { 00237 anomolous_events |= pollfds[pollfd_index].revents; 00238 } 00239 } 00240 00241 if (active_index == -1) 00242 { 00243 if (anomolous_events) 00244 TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Wrong event received from a pollfd. Mask: " << static_cast<int>(anomolous_events); 00245 active_receive_fd_ = -1; 00246 continue; 00247 } 00248 00249 if (!done && timeout_usec > 0) 00250 { 00251 // calc next timeout_ms (unless timed out) 00252 size_t delta_us = TimeUtils::gettimeofday_us() - start_time_us; 00253 if (delta_us > timeout_usec) 00254 { 00255 return RECV_TIMEOUT; 00256 } 00257 timeout_ms = ((timeout_usec - delta_us) + 999) / 1000; // want at least 1 ms 00258 } 00259 } 00260 if (loop_guard > 10) { usleep(1000); } 00261 if (++loop_guard > 10010) 00262 { 00263 TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentHeader: loop guard triggered, returning RECV_TIMEOUT"; 00264 usleep(receive_err_wait_us_); 00265 active_receive_fd_ = -1; 00266 return RECV_TIMEOUT; 00267 } 00268 00269 if (state == SocketState::Metadata) 00270 { 00271 //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Reading Message Header" ; 00272 buff = &(mha[offset]); 00273 byte_cnt = sizeof(MessHead) - offset; 00274 } 00275 else 00276 { 00277 //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentHeader: Reading data" ; 00278 buff = reinterpret_cast<uint8_t*>(&header) + offset; 00279 byte_cnt = target_bytes - offset; 00280 } 00281 //if (byte_cnt > sizeof(MessHead)) 00282 // { 00283 // TLOG(TLVL_ERROR) << "Invalid byte count for read (count=" << byte_cnt 00284 // << ",offset=" << offset << ",mh.byte_count=" << mh.byte_count 00285 // << "), skipping read and returning RECV_TIMEOUT"; 00286 // return RECV_TIMEOUT; 00287 //} 00288 00289 if (byte_cnt > 0) 00290 { 00291 TLOG(6) << GetTraceName() << ": receiveFragmentHeader: Reading " << byte_cnt << " bytes from socket"; 00292 sts = read(active_receive_fd_, buff, byte_cnt); 00293 TLOG(6) << GetTraceName() << ": receiveFragmentHeader: Done with read"; 00294 } 00295 if (sts > 0) { 00296 loop_guard = 0; 00297 last_recv_time_ = std::chrono::steady_clock::now(); 00298 } 00299 00300 TLOG(7) << GetTraceName() << ": receiveFragmentHeader state=" << static_cast<int>(state) << " read=" << sts; 00301 if (sts < 0) 00302 { 00303 TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentHeader: Error on receive, closing socket " << " (errno=" << errno << ": " << strerror(errno) << ")"; 00304 active_receive_fd_ = disconnect_receive_socket_(active_receive_fd_); 00305 } 00306 else if (sts == 0) 00307 { 00308 if (!noDataWarningSent) { 00309 TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentHeader: No data received, is the sender still sending?!?"; 00310 noDataWarningSent = true; 00311 } 00312 if (TimeUtils::GetElapsedTime(last_recv_time_) > receive_disconnected_wait_s_) 00313 { 00314 TLOG(TLVL_ERROR) << GetTraceName() << ": receiveFragmentHeader: No data received within timeout, aborting!"; 00315 return RECV_TIMEOUT; 00316 } 00317 } 00318 else 00319 { 00320 // see if we're done (with this state) 00321 sts = offset += sts; 00322 if (sts >= target_bytes) 00323 { 00324 TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Target read bytes reached. Changing state"; 00325 offset = 0; 00326 if (state == SocketState::Metadata) 00327 { 00328 state = SocketState::Data; 00329 mh.byte_count = ntohl(mh.byte_count); 00330 mh.source_id = ntohs(mh.source_id); 00331 target_bytes = mh.byte_count; 00332 TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Expected header size = " << target_bytes << ", sizeof(RawFragmentHeader) = " << sizeof(artdaq::detail::RawFragmentHeader); 00333 //assert(target_bytes == sizeof(artdaq::detail::RawFragmentHeader) || target_bytes == 0); 00334 00335 if (mh.message_type == MessHead::stop_v0) 00336 { 00337 active_receive_fd_ = disconnect_receive_socket_(active_receive_fd_, "Stop Message received."); 00338 } 00339 else if (mh.message_type == MessHead::data_v0 || mh.message_type == MessHead::data_more_v0) 00340 { 00341 TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentHeader: Message header indicates that Fragment data follows when I was expecting a Fragment header!"; 00342 active_receive_fd_ = disconnect_receive_socket_(active_receive_fd_, "Desync detected"); 00343 } 00344 00345 if (target_bytes == 0) 00346 { 00347 //Probably a stop_v0, return timeout so we can try again. 00348 return RECV_TIMEOUT; 00349 } 00350 } 00351 else 00352 { 00353 ret_rank = source_rank(); 00354 TLOG(8) << GetTraceName() << ": receiveFragmentHeader done sts=" << sts << " src=" << ret_rank; 00355 TLOG(7) << GetTraceName() << ": receiveFragmentHeader: Done receiving fragment header. Moving into output."; 00356 00357 done = true; // no more polls 00358 //break; // no more read of ready fds 00359 } 00360 } 00361 } 00362 00363 } // while(!done)...poll 00364 00365 TLOG(5) << GetTraceName() << ": receiveFragmentHeader: Returning " << ret_rank; 00366 return ret_rank; 00367 } 00368 00369 int artdaq::TCPSocketTransfer::disconnect_receive_socket_(int fd, std::string msg) 00370 { 00371 TLOG(TLVL_WARNING) << GetTraceName() << ": disconnect_receive_socket_: " << msg << " Closing socket " << fd; 00372 close(fd); 00373 std::unique_lock<std::mutex> lk(connected_fd_mutex_); 00374 if (connected_fds_.count(source_rank())) 00375 connected_fds_[source_rank()].erase(fd); 00376 fd = -1; 00377 TLOG(TLVL_DEBUG) << GetTraceName() << ": disconnect_receive_socket_: There are now " << connected_fds_[source_rank()].size() << " active senders."; 00378 return fd; 00379 } 00380 00381 int artdaq::TCPSocketTransfer::receiveFragmentData(RawDataType* destination, size_t) 00382 { 00383 TLOG(19) << GetTraceName() << ": receiveFragmentData: BEGIN"; 00384 int ret_rank = RECV_TIMEOUT; 00385 if (active_receive_fd_ == -1) 00386 { // what if just listen_fd??? 00387 TLOG(TLVL_ERROR) << GetTraceName() << ": receiveFragmentData: Receive socket not connected, returning RECV_TIMEOUT (Will result in \"Unexpected return code error\")"; 00388 return RECV_TIMEOUT; 00389 } 00390 00391 //void* buff=alloca(max_fragment_size_words_*8); 00392 uint8_t* buff; 00393 size_t byte_cnt = 0; 00394 int sts; 00395 int offset = 0; 00396 SocketState state = SocketState::Metadata; 00397 int target_bytes = sizeof(MessHead); 00398 00399 pollfd pollfd_s; 00400 pollfd_s.events = POLLIN | POLLPRI | POLLERR; 00401 pollfd_s.fd = active_receive_fd_; 00402 00403 int loop_guard = 0; 00404 bool done = false; 00405 bool noDataWarningSent = false; 00406 last_recv_time_ = std::chrono::steady_clock::now(); 00407 while (!done) 00408 { 00409 TLOG(9) << GetTraceName() << ": receiveFragmentData: Polling fd to see if there's data"; 00410 int num_fds_ready = poll(&pollfd_s, 1, 1000); 00411 TLOG(TLVL_TRACE) << GetTraceName() << ": receiveFragmentData: Polled fd to see if there's data" 00412 << ", num_fds_ready = " << num_fds_ready; 00413 if (num_fds_ready <= 0) 00414 { 00415 if (num_fds_ready == 0) 00416 { 00417 TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentData: No data from " << source_rank() << " in " << TimeUtils::GetElapsedTimeMilliseconds(last_recv_time_) << " ms!" 00418 << " State = " << (state == SocketState::Metadata ? "Metadata" : "Data") << ", recvd/total=" << offset << "/" << target_bytes << " (delta=" << target_bytes - offset << ")"; 00419 00420 if (TimeUtils::GetElapsedTime(last_recv_time_) > receive_disconnected_wait_s_) 00421 { 00422 TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentData: No data received within timeout (" << TimeUtils::GetElapsedTime(last_recv_time_) << " / " << receive_disconnected_wait_s_ << " ), returning RECV_TIMEOUT"; 00423 disconnect_receive_socket_(active_receive_fd_, "No data on this socket within timeout"); 00424 active_receive_fd_ = -1; 00425 return RECV_TIMEOUT; 00426 } 00427 continue; 00428 } 00429 00430 TLOG(TLVL_ERROR) << "Error in poll: errno=" << errno; 00431 active_receive_fd_ = -1; 00432 break; 00433 } 00434 else { last_recv_time_ = std::chrono::steady_clock::now(); } 00435 00436 if (pollfd_s.revents & (POLLIN | POLLPRI)) 00437 { 00438 // Expected, don't have to check revents any further 00439 } 00440 else if (pollfd_s.revents & (POLLNVAL)) 00441 { 00442 disconnect_receive_socket_(pollfd_s.fd, "FD is closed, most likely because the peer went away."); 00443 break; 00444 } 00445 else if (pollfd_s.revents & (POLLHUP | POLLERR)) 00446 { 00447 disconnect_receive_socket_(pollfd_s.fd, "Poll returned POLLHUP or POLLERR, indicating problems with the sender."); 00448 break; 00449 } 00450 else 00451 { 00452 TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentData: Wrong event received from pollfd: " << pollfd_s.revents; 00453 disconnect_receive_socket_(pollfd_s.fd); 00454 break; 00455 } 00456 00457 if (state == SocketState::Metadata) 00458 { 00459 //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Reading Message Header" ; 00460 buff = &(mha[offset]); 00461 byte_cnt = sizeof(MessHead) - offset; 00462 } 00463 else 00464 { 00465 //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Reading data" ; 00466 buff = reinterpret_cast<uint8_t*>(destination) + offset; 00467 byte_cnt = mh.byte_count - offset; 00468 } 00469 00470 TLOG(10) << GetTraceName() << ": receiveFragmentData: Reading " << byte_cnt << " bytes from socket into " << (void*)buff; 00471 sts = read(active_receive_fd_, buff, byte_cnt); 00472 //TLOG(TLVL_DEBUG) << GetTraceName() << ": receiveFragmentData: Done with read" ; 00473 00474 TLOG(10) << GetTraceName() << ": recvFragment state=" << static_cast<int>(state) << " read=" << sts; 00475 00476 if (sts == 0) 00477 { 00478 if (loop_guard > 10) { usleep(1000); } 00479 if (++loop_guard > 10010) 00480 { 00481 TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentData: loop guard triggered, returning RECV_TIMEOUT"; 00482 active_receive_fd_ = -1; 00483 return RECV_TIMEOUT; 00484 } 00485 } 00486 else if(sts > 0) 00487 { 00488 loop_guard = 0; 00489 last_recv_time_ = std::chrono::steady_clock::now(); 00490 } 00491 00492 if (sts < 0) 00493 { 00494 TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentData: Error on receive, closing socket" 00495 << " (errno=" << errno << ": " << strerror(errno) << ")"; 00496 disconnect_receive_socket_(pollfd_s.fd); 00497 } 00498 else if (sts == 0) 00499 { 00500 if (!noDataWarningSent) { 00501 TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentData: No data received, is the sender still sending?!?"; 00502 noDataWarningSent = true; 00503 } 00504 if (TimeUtils::GetElapsedTime(last_recv_time_) > receive_disconnected_wait_s_) 00505 { 00506 TLOG(TLVL_ERROR) << GetTraceName() << ": receiveFragmentData: No data received within timeout, aborting!"; 00507 return RECV_TIMEOUT; 00508 } 00509 } 00510 else 00511 { 00512 // see if we're done (with this state) 00513 sts = offset += sts; 00514 if (sts >= target_bytes) 00515 { 00516 TLOG(9) << GetTraceName() << ": receiveFragmentData: Target read bytes reached. Changing state"; 00517 offset = 0; 00518 if (state == SocketState::Metadata) 00519 { 00520 state = SocketState::Data; 00521 mh.byte_count = ntohl(mh.byte_count); 00522 mh.source_id = ntohs(mh.source_id); 00523 target_bytes = mh.byte_count; 00524 00525 00526 if (mh.message_type == MessHead::header_v0) 00527 { 00528 TLOG(TLVL_WARNING) << GetTraceName() << ": receiveFragmentData: Message header indicates that a Fragment header follows when I was expecting Fragment data!"; 00529 active_receive_fd_ = disconnect_receive_socket_(active_receive_fd_, "Desync detected"); 00530 } 00531 } 00532 else 00533 { 00534 ret_rank = source_rank(); 00535 TLOG(11) << GetTraceName() << ": receiveFragmentData done sts=" << sts << " src=" << ret_rank; 00536 TLOG(9) << GetTraceName() << ": receiveFragmentData: Done receiving fragment. Moving into output."; 00537 00538 #if USE_ACKS 00539 send_ack_(active_receive_fd_); 00540 #endif 00541 00542 done = true; // no more polls 00543 //break; // no more read of ready fds 00544 } 00545 } 00546 } 00547 00548 // Check if we were asked to do a 0-size receive 00549 if (target_bytes == 0 && state == SocketState::Data) 00550 { 00551 ret_rank = source_rank(); 00552 TLOG(11) << GetTraceName() << ": receiveFragmentData done sts=" << sts << " src=" << ret_rank; 00553 TLOG(9) << GetTraceName() << ": receiveFragmentData: Done receiving fragment. Moving into output."; 00554 00555 #if USE_ACKS 00556 send_ack_(active_receive_fd_); 00557 #endif 00558 00559 done = true; // no more polls 00560 } 00561 00562 } // while(!done)...poll 00563 00564 last_active_receive_fd_ = active_receive_fd_; 00565 active_receive_fd_ = -1; 00566 00567 TLOG(9) << GetTraceName() << ": receiveFragmentData: Returning rank " << ret_rank; 00568 return ret_rank; 00569 } 00570 00571 bool artdaq::TCPSocketTransfer::isRunning() 00572 { 00573 switch (role()) 00574 { 00575 case TransferInterface::Role::kSend: 00576 return send_fd_ != -1; 00577 case TransferInterface::Role::kReceive: 00578 TLOG(TLVL_DEBUG) << GetTraceName() << ": isRunning: There are " << getConnectedFDCount(source_rank()) << " fds connected."; 00579 return getConnectedFDCount(source_rank()) > 0; 00580 } 00581 return false; 00582 } 00583 00584 void artdaq::TCPSocketTransfer::flush_buffers() 00585 { 00586 while(connected_fds_[source_rank()].size()) { 00587 disconnect_receive_socket_(*connected_fds_[source_rank()].begin(), "Flushing connections"); 00588 } 00589 } 00590 00591 // Send the given Fragment. Return the rank of the destination to which 00592 // the Fragment was sent OR -1 if to none. 00593 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendFragment_(Fragment&& frag, size_t send_timeout_usec) 00594 { 00595 TLOG(12) << GetTraceName() << ": sendFragment begin send of fragment with sequenceID="<<frag.sequenceID(); 00596 artdaq::Fragment grab_ownership_frag = std::move(frag); 00597 00598 reconnect_(); 00599 // Send Fragment Header 00600 00601 #if USE_ACKS 00602 // Wait for fragments to be received 00603 while (static_cast<size_t>(send_ack_diff_) > buffer_count_) usleep(10000); 00604 #endif 00605 00606 iovec iov = { reinterpret_cast<void*>(grab_ownership_frag.headerAddress()), 00607 detail::RawFragmentHeader::num_words() * sizeof(RawDataType) }; 00608 00609 auto sts = sendData_(&iov, 1, send_retry_timeout_us_, true); 00610 auto start_time = std::chrono::steady_clock::now(); 00611 //If it takes more than 10 seconds to write a Fragment header, give up 00612 while (sts == CopyStatus::kTimeout && (send_timeout_usec == 0 || TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_timeout_usec) && TimeUtils::GetElapsedTimeMicroseconds(start_time) < 10000000) 00613 { 00614 TLOG(13) << GetTraceName() << ": sendFragment: Timeout sending fragment"; 00615 sts = sendData_(&iov, 1, send_retry_timeout_us_, true); 00616 usleep(1000); 00617 } 00618 if (sts != CopyStatus::kSuccess) return sts; 00619 00620 // Send Fragment Data 00621 00622 iov = { reinterpret_cast<void*>(grab_ownership_frag.headerAddress() + detail::RawFragmentHeader::num_words()), 00623 grab_ownership_frag.sizeBytes() - detail::RawFragmentHeader::num_words() * sizeof(RawDataType) 00624 }; 00625 sts = sendData_(&iov, 1, send_retry_timeout_us_); 00626 start_time = std::chrono::steady_clock::now(); 00627 while (sts == CopyStatus::kTimeout && (send_timeout_usec == 0 || TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_timeout_usec) && TimeUtils::GetElapsedTimeMicroseconds(start_time) < 10000000) 00628 { 00629 TLOG(13) << GetTraceName() << ": sendFragment: Timeout sending fragment"; 00630 sts = sendData_(&iov, 1, send_retry_timeout_us_); 00631 usleep(1000); 00632 } 00633 00634 #if USE_ACKS 00635 send_ack_diff_++; 00636 #endif 00637 00638 TLOG(12) << GetTraceName() << ": sendFragment returning " << CopyStatusToString(sts); 00639 return sts; 00640 } 00641 00642 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendData_(const void* buf, size_t bytes, size_t send_timeout_usec, bool isHeader) 00643 { 00644 TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_ Converting buf to iovec"; 00645 iovec iov = { (void*)buf, bytes }; 00646 return sendData_(&iov, 1, send_timeout_usec, isHeader); 00647 } 00648 00649 artdaq::TransferInterface::CopyStatus artdaq::TCPSocketTransfer::sendData_(const struct iovec* iov, int iovcnt, size_t send_timeout_usec, bool isHeader) 00650 { 00651 // check all connected??? -- currently just check fd!=-1 00652 if (send_fd_ == -1) 00653 { 00654 if (timeoutMessageArmed_) 00655 { 00656 TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_: Send fd is not open. Returning kTimeout"; 00657 timeoutMessageArmed_ = false; 00658 } 00659 return CopyStatus::kTimeout; 00660 } 00661 timeoutMessageArmed_ = true; 00662 TLOG(14) << GetTraceName() << ": send_timeout_usec is " << send_timeout_usec << ", currently unused."; 00663 00664 //TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_: Determining write size" ; 00665 uint32_t total_to_write_bytes = 0; 00666 std::vector<iovec> iov_in(iovcnt + 1); // need contiguous (for the unlike case that only partial MH 00667 std::vector<iovec> iovv(iovcnt + 2); // 1 more for mh and another one for any partial 00668 int ii; 00669 for (ii = 0; ii < iovcnt; ++ii) 00670 { 00671 iov_in[ii + 1] = iov[ii]; 00672 total_to_write_bytes += iov[ii].iov_len; 00673 } 00674 //TLOG(TLVL_DEBUG) << GetTraceName() << ": sendData_: Constructing Message Header" ; 00675 MessHead mh = { 0,isHeader ? MessHead::header_v0 : MessHead::data_v0,htons(source_rank()),{htonl(total_to_write_bytes)} }; 00676 iov_in[0].iov_base = &mh; 00677 iov_in[0].iov_len = sizeof(mh); 00678 total_to_write_bytes += sizeof(mh); 00679 00680 ssize_t sts = 0; 00681 ssize_t total_written_bytes = 0; 00682 ssize_t per_write_max_bytes = (32 * 1024); 00683 00684 size_t in_iov_idx = 0; // only increment this when we know the associated data has been xferred 00685 size_t out_iov_idx = 0; 00686 ssize_t this_write_bytes = 0; 00687 00688 do 00689 { 00690 // The first out_iov may be set at the end of the previous loop. 00691 // iov looping from below (b/c of the latter, we need to check this_write_bytes) 00692 for (; 00693 (in_iov_idx + out_iov_idx) < iov_in.size() && this_write_bytes < per_write_max_bytes; 00694 ++out_iov_idx) 00695 { 00696 this_write_bytes += iov_in[in_iov_idx + out_iov_idx].iov_len; 00697 iovv[out_iov_idx] = iov_in[in_iov_idx + out_iov_idx]; 00698 } 00699 if (this_write_bytes > per_write_max_bytes) 00700 { 00701 iovv[out_iov_idx - 1].iov_len -= this_write_bytes - per_write_max_bytes; 00702 this_write_bytes = per_write_max_bytes; 00703 } 00704 00705 // need to do blocking algorithm -- including throttled block notifications 00706 do_again: 00707 #ifndef __OPTIMIZE__ // This can be an expensive TRACE call (even if disabled) due to multiplicity of calls 00708 TLOG(14) << GetTraceName() << ": sendFragment b4 writev " << std::setw(7) << total_written_bytes << " total_written_bytes send_fd_=" << send_fd_ << " in_idx=" << in_iov_idx 00709 << " iovcnt=" << out_iov_idx << " 1st.len=" << iovv[0].iov_len; 00710 #endif 00711 //TLOG(TLVL_DEBUG) << GetTraceName() << " calling writev" ; 00712 sts = writev(send_fd_, &(iovv[0]), out_iov_idx); 00713 //TLOG(TLVL_DEBUG) << GetTraceName() << " done with writev" ; 00714 00715 if (sts == -1) 00716 { 00717 if (errno == EAGAIN /* same as EWOULDBLOCK */) 00718 { 00719 TLOG(TLVL_DEBUG) << GetTraceName() << ": sendFragment EWOULDBLOCK"; 00720 fcntl(send_fd_, F_SETFL, 0); // clear O_NONBLOCK 00721 blocking = true; 00722 // NOTE: YES -- could drop here 00723 goto do_again; 00724 } 00725 TLOG(TLVL_WARNING) << GetTraceName() << ": sendFragment_: WRITE ERROR: " << strerror(errno); 00726 connect_state = 0; // any write error closes 00727 close(send_fd_); 00728 send_fd_ = -1; 00729 return TransferInterface::CopyStatus::kErrorNotRequiringException; 00730 } 00731 else if (sts != this_write_bytes) 00732 { 00733 // we'll loop around -- with 00734 TLOG(TLVL_DEBUG) << GetTraceName() << ": sendFragment writev sts(" << sts << ")!=requested_send_bytes(" << this_write_bytes << ")"; 00735 total_written_bytes += sts; // add sts to total_written_bytes now as sts is adjusted next 00736 // find which iovs are done 00737 for (ii = 0; (size_t)sts >= iovv[ii].iov_len; ++ii) 00738 sts -= iovv[ii].iov_len; 00739 in_iov_idx += ii; // done with these in_iovs 00740 iovv[ii].iov_len -= sts; // adjust partial iov 00741 iovv[ii].iov_base = (uint8_t*)(iovv[ii].iov_base) + sts; // adjust partial iov 00742 00743 // add more to get up to per_write_max_bytes 00744 out_iov_idx = 0; 00745 if (ii != 0) 00746 iovv[out_iov_idx] = iovv[ii]; 00747 // starting over 00748 this_write_bytes = iovv[out_iov_idx].iov_len; 00749 // add any left over from appropriate in_iov_idx -- 00750 // i.e. match this out_iov with the in_iov that was used to 00751 // initialize it; see how close the out base+len is to in base+len 00752 // check !>per_write_max_bytes 00753 unsigned long additional = ((unsigned long)iov_in[in_iov_idx].iov_base + iov_in[in_iov_idx].iov_len) 00754 - ((unsigned long)iovv[out_iov_idx].iov_base + iovv[out_iov_idx].iov_len); 00755 if (additional) 00756 { 00757 iovv[out_iov_idx].iov_len += additional; 00758 this_write_bytes += additional; 00759 if (this_write_bytes > per_write_max_bytes) 00760 { 00761 iovv[out_iov_idx].iov_len -= this_write_bytes - per_write_max_bytes; 00762 this_write_bytes = per_write_max_bytes; 00763 } 00764 } 00765 ++out_iov_idx; // done with 00766 TLOG(TLVL_TRACE) << GetTraceName() << ": sendFragment writev sts!=: this_write_bytes=" << this_write_bytes 00767 << " out_iov_idx=" << out_iov_idx 00768 << " additional=" << additional 00769 << " ii=" << ii; 00770 } 00771 else 00772 { 00773 #ifndef __OPTIMIZE__ // This can be an expensive TRACE call (even if disabled) due to multiplicity of calls 00774 TLOG(TLVL_TRACE) << GetTraceName() << ": sendFragment writev sts(" << sts << ")==requested_send_bytes(" << this_write_bytes << ")"; 00775 #endif 00776 total_written_bytes += sts; 00777 --out_iov_idx; // make it the index of the last iovv 00778 iovv[out_iov_idx].iov_base = (uint8_t*)(iovv[out_iov_idx].iov_base) + iovv[out_iov_idx].iov_len; 00779 iovv[out_iov_idx].iov_len = 0; 00780 in_iov_idx += out_iov_idx; // at least this many complete (one more if "last iovv" is complete 00781 this_write_bytes = 0; 00782 // need to check last iovv against appropriate iov_in 00783 unsigned long additional = ((unsigned long)iov_in[in_iov_idx].iov_base + iov_in[in_iov_idx].iov_len) 00784 - ((unsigned long)iovv[out_iov_idx].iov_base + iovv[out_iov_idx].iov_len); 00785 if (additional) 00786 { 00787 iovv[out_iov_idx].iov_len += additional; 00788 this_write_bytes += additional; 00789 if (this_write_bytes > per_write_max_bytes) 00790 { 00791 iovv[out_iov_idx].iov_len -= this_write_bytes - per_write_max_bytes; 00792 this_write_bytes = per_write_max_bytes; 00793 } 00794 if (out_iov_idx != 0) 00795 iovv[0] = iovv[out_iov_idx]; 00796 out_iov_idx = 1; 00797 } 00798 else 00799 { 00800 ++in_iov_idx; 00801 out_iov_idx = 0; 00802 } 00803 } 00804 } while (total_written_bytes < total_to_write_bytes); 00805 if (total_written_bytes > total_to_write_bytes) 00806 TLOG(TLVL_ERROR) << GetTraceName() << ": sendFragment program error: too many bytes transferred"; 00807 00808 if (blocking) 00809 { 00810 blocking = false; 00811 fcntl(send_fd_, F_SETFL, O_NONBLOCK); // set O_NONBLOCK 00812 } 00813 sts = total_written_bytes - sizeof(MessHead); 00814 00815 TLOG(14) << GetTraceName() << ": sendFragment sts=" << sts; 00816 return TransferInterface::CopyStatus::kSuccess; 00817 } 00818 00819 void artdaq::TCPSocketTransfer::connect_() 00820 { 00821 auto start_time = std::chrono::steady_clock::now(); 00822 00823 // Retry a few times if we can't connect 00824 while (send_fd_ == -1 && TimeUtils::GetElapsedTimeMicroseconds(start_time) < send_retry_timeout_us_ * 10) 00825 { 00826 TLOG(TLVL_DEBUG) << GetTraceName() << ": Connecting sender socket"; 00827 int sndbuf_bytes = static_cast<int>(sndbuf_); 00828 if (sndbuf_ > INT_MAX) 00829 { 00830 sndbuf_bytes = INT_MAX; 00831 TLOG(TLVL_WARNING) << "Requested SNDBUF " << sndbuf_ << " too large, setting to INT_MAX: " << INT_MAX; 00832 } 00833 TLOG(TLVL_DEBUG) << "Requested SNDBUF is " << sndbuf_bytes; 00834 00835 send_fd_ = TCPConnect(hostMap_[destination_rank()].c_str() 00836 , portMan->GetTCPSocketTransferPort(destination_rank()) 00837 , O_NONBLOCK 00838 , sndbuf_bytes); 00839 if (send_fd_ == -1) 00840 usleep(send_retry_timeout_us_); 00841 } 00842 connect_state = 0; 00843 blocking = 0; 00844 TLOG(TLVL_DEBUG) << GetTraceName() << ": connect_ " + hostMap_[destination_rank()] + ":" << portMan->GetTCPSocketTransferPort(destination_rank()) << " send_fd_=" << send_fd_; 00845 if (send_fd_ != -1) 00846 { 00847 // write connect msg 00848 TLOG(TLVL_DEBUG) << GetTraceName() << ": connect_: Writing connect message"; 00849 MessHead mh = { 0,MessHead::connect_v0,htons(source_rank()),{htonl(CONN_MAGIC)} }; 00850 ssize_t sts = write(send_fd_, &mh, sizeof(mh)); 00851 if (sts == -1) 00852 { 00853 TLOG(TLVL_ERROR) << GetTraceName() << ": connect_: Error writing connect message!"; 00854 // a write error here is completely unexpected! 00855 connect_state = 0; 00856 close(send_fd_); 00857 send_fd_ = -1; 00858 } 00859 else 00860 { 00861 TLOG(TLVL_INFO) << GetTraceName() << ": connect_: Successfully connected"; 00862 // consider it all connected/established 00863 connect_state = 1; 00864 } 00865 00866 #if USE_ACKS 00867 if (ack_listen_thread_ && ack_listen_thread_->joinable()) ack_listen_thread_->join(); 00868 TLOG(TLVL_INFO) << GetTraceName() << ": Starting Ack Listener Thread"; 00869 00870 try { 00871 ack_listen_thread_ = std::make_unique<boost::thread>(&TCPSocketTransfer::receive_acks_, this); 00872 } 00873 catch (const boost::exception& e) 00874 { 00875 TLOG(TLVL_ERROR) << "Caught boost::exception starting TCP Socket Ack Listen thread: " << boost::diagnostic_information(e) << ", errno=" << errno; 00876 std::cerr << "Caught boost::exception starting TCP Socket Ack Listen thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl; 00877 exit(5); 00878 } 00879 #endif 00880 } 00881 } 00882 00883 void artdaq::TCPSocketTransfer::reconnect_() 00884 { 00885 if (send_fd_ == -1 && role() == TransferInterface::Role::kSend) 00886 { 00887 TLOG(TLVL_TRACE) << GetTraceName() << ": check/reconnect"; 00888 return connect_(); 00889 } 00890 } 00891 00892 void artdaq::TCPSocketTransfer::start_listen_thread_() 00893 { 00894 std::unique_lock<std::mutex> start_lock(listen_thread_mutex_); 00895 if (listen_thread_refcount_ == 0) 00896 { 00897 if (listen_thread_ && listen_thread_->joinable()) listen_thread_->join(); 00898 listen_thread_refcount_ = 1; 00899 TLOG(TLVL_INFO) << GetTraceName() << ": Starting Listener Thread"; 00900 00901 try { 00902 listen_thread_ = std::make_unique<boost::thread>(&TCPSocketTransfer::listen_, portMan->GetTCPSocketTransferPort(destination_rank()), rcvbuf_); 00903 } 00904 catch (const boost::exception& e) 00905 { 00906 TLOG(TLVL_ERROR) << "Caught boost::exception starting TCP Socket Listen thread: " << boost::diagnostic_information(e) << ", errno=" << errno; 00907 std::cerr << "Caught boost::exception starting TCP Socket Listen thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl; 00908 exit(5); 00909 } 00910 } 00911 else 00912 { 00913 listen_thread_refcount_++; 00914 } 00915 } 00916 00917 #if USE_ACKS 00918 void artdaq::TCPSocketTransfer::receive_acks_() 00919 { 00920 while (send_fd_ >= 0) 00921 { 00922 pollfd pollfd_s; 00923 pollfd_s.events = POLLIN | POLLPRI; 00924 pollfd_s.fd = send_fd_; 00925 00926 TLOG(18) << GetTraceName() << ": receive_acks_: Polling fd to see if there's data"; 00927 int num_fds_ready = poll(&pollfd_s, 1, 1000); 00928 if (num_fds_ready <= 0) 00929 { 00930 if (num_fds_ready == 0) 00931 { 00932 TLOG(18) << GetTraceName() << ": receive_acks_: No data on receive socket"; 00933 continue; 00934 } 00935 00936 TLOG(TLVL_ERROR) << "Error in poll: errno=" << errno; 00937 break; 00938 } 00939 00940 if (pollfd_s.revents & (POLLIN | POLLPRI)) 00941 { 00942 // Expected, don't have to check revents any further 00943 } 00944 else 00945 { 00946 TLOG(TLVL_DEBUG) << GetTraceName() << ": receive_acks_: Wrong event received from pollfd: " << pollfd_s.revents; 00947 break; 00948 } 00949 00950 MessHead mh; 00951 auto sts = read(send_fd_, &mh, sizeof(mh)); 00952 00953 if (sts != sizeof(mh)) 00954 { 00955 TLOG(TLVL_ERROR) << GetTraceName() << ": receive_ack_: Wrong message header length received! (actual " << sts << " != " << sizeof(mh) << " expected)"; 00956 continue; 00957 } 00958 00959 // check for "magic" and valid source_id(aka rank) 00960 mh.source_id = ntohs(mh.source_id); // convert here as it is reference several times 00961 if (mh.source_id != my_rank) 00962 { 00963 TLOG(TLVL_ERROR) << GetTraceName() << ": receive_ack_: Received ack for different sender! Rank=" << my_rank << ", hdr=" << mh.source_id; 00964 continue; 00965 } 00966 if (ntohl(mh.conn_magic) != ACK_MAGIC || !(mh.message_type == MessHead::ack_v0)) // Allow for future connect message versions 00967 { 00968 TLOG(TLVL_ERROR) << GetTraceName() << ": receive_ack_: Wrong magic bytes in header!"; 00969 continue; 00970 } 00971 00972 TLOG(17) << GetTraceName() << ": receive_acks_: Received ack message, diff is now " << (send_ack_diff_.load() - 1); 00973 send_ack_diff_--; 00974 } 00975 } 00976 00977 void artdaq::TCPSocketTransfer::send_ack_(int fd) 00978 { 00979 MessHead mh = { 0,MessHead::ack_v0,htons(source_rank()),{ htonl(ACK_MAGIC) } }; 00980 write(fd, &mh, sizeof(mh)); 00981 } 00982 #endif 00983 00984 void artdaq::TCPSocketTransfer::listen_(int port, size_t rcvbuf) 00985 { 00986 int listen_fd = -1; 00987 while (listen_thread_refcount_ > 0) 00988 { 00989 TLOG(TLVL_TRACE) << "listen_: Listening/accepting new connections on port " << port; 00990 if (listen_fd == -1) 00991 { 00992 TLOG(TLVL_DEBUG) << "listen_: Opening listener"; 00993 listen_fd = TCP_listen_fd(port, rcvbuf); 00994 } 00995 if (listen_fd == -1) 00996 { 00997 TLOG(TLVL_DEBUG) << "listen_: Error creating listen_fd!"; 00998 break; 00999 } 01000 01001 int res; 01002 timeval tv = { 2,0 }; // maybe increase of some global "debugging" flag set??? 01003 fd_set rfds; 01004 FD_ZERO(&rfds); 01005 FD_SET(listen_fd, &rfds); 01006 01007 res = select(listen_fd + 1, &rfds, (fd_set *)0, (fd_set *)0, &tv); 01008 if (res > 0) 01009 { 01010 int sts; 01011 sockaddr_un un; 01012 socklen_t arglen = sizeof(un); 01013 int fd; 01014 TLOG(TLVL_DEBUG) << "listen_: Calling accept"; 01015 fd = accept(listen_fd, (sockaddr *)&un, &arglen); 01016 TLOG(TLVL_DEBUG) << "listen_: Done with accept"; 01017 01018 TLOG(TLVL_DEBUG) << "listen_: Reading connect message"; 01019 socklen_t lenlen = sizeof(tv); 01020 /*sts=*/ 01021 setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, lenlen); // see man 7 socket. 01022 MessHead mh; 01023 uint64_t mark_us = TimeUtils::gettimeofday_us(); 01024 sts = read(fd, &mh, sizeof(mh)); 01025 uint64_t delta_us = TimeUtils::gettimeofday_us() - mark_us; 01026 TLOG(TLVL_DEBUG) << "listen_: Read of connect message took " << delta_us << " microseconds."; 01027 if (sts != sizeof(mh)) 01028 { 01029 TLOG(TLVL_DEBUG) << "listen_: Wrong message header length received!"; 01030 close(fd); 01031 continue; 01032 } 01033 01034 // check for "magic" and valid source_id(aka rank) 01035 mh.source_id = ntohs(mh.source_id); // convert here as it is reference several times 01036 if (ntohl(mh.conn_magic) != CONN_MAGIC || !(mh.message_type == MessHead::connect_v0)) // Allow for future connect message versions 01037 { 01038 TLOG(TLVL_DEBUG) << "listen_: Wrong magic bytes in header!"; 01039 close(fd); 01040 continue; 01041 } 01042 01043 // now add (new) connection 01044 std::unique_lock<std::mutex> lk(connected_fd_mutex_); 01045 connected_fds_[mh.source_id].insert(fd); 01046 01047 TLOG(TLVL_INFO) << "listen_: New fd is " << fd << " for source rank " << mh.source_id; 01048 } 01049 else 01050 { 01051 TLOG(16) << "listen_: No connections in timeout interval!"; 01052 } 01053 } 01054 01055 TLOG(TLVL_INFO) << "listen_: Shutting down connection listener"; 01056 if (listen_fd != -1) close(listen_fd); 01057 std::unique_lock<std::mutex> lk(connected_fd_mutex_); 01058 auto it = connected_fds_.begin(); 01059 while (it != connected_fds_.end()) 01060 { 01061 auto& fd_set = it->second; 01062 auto rank_it = fd_set.begin(); 01063 while (rank_it != fd_set.end()) 01064 { 01065 close(*rank_it); 01066 rank_it = fd_set.erase(rank_it); 01067 } 01068 it = connected_fds_.erase(it); 01069 } 01070 01071 } // do_connect_ 01072 01073 DEFINE_ARTDAQ_TRANSFER(artdaq::TCPSocketTransfer)