00001 #define TRACE_NAME "RequestReceiver"
00002
00003 #include "artdaq/DAQrate/RequestReceiver.hh"
00004 #include "artdaq/DAQdata/Globals.hh"
00005 #include "artdaq/DAQrate/detail/RequestMessage.hh"
00006
00007 #include <boost/exception/all.hpp>
00008 #include <boost/throw_exception.hpp>
00009
00010 #include <limits>
00011 #include <iterator>
00012
00013 #include "canvas/Utilities/Exception.h"
00014 #include "cetlib_except/exception.h"
00015 #include "fhiclcpp/ParameterSet.h"
00016
00017 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
00018 #include "artdaq-core/Data/Fragment.hh"
00019 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
00020 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00021 #include "artdaq-core/Utilities/TimeUtils.hh"
00022
00023 #include <fstream>
00024 #include <iomanip>
00025 #include <iterator>
00026 #include <iostream>
00027 #include <iomanip>
00028 #include <algorithm>
00029 #include <sys/poll.h>
00030 #include "artdaq/DAQdata/TCPConnect.hh"
00031
00032 artdaq::RequestReceiver::RequestReceiver()
00033 : request_port_(3001)
00034 , request_addr_("227.128.12.26")
00035 , running_(false)
00036 , requests_()
00037 , request_timing_()
00038 , request_stop_requested_(false)
00039 , request_received_(false)
00040 , end_of_run_timeout_ms_(1000)
00041 , should_stop_(false)
00042 , highest_seen_request_(0)
00043 {}
00044
00045 artdaq::RequestReceiver::RequestReceiver(const fhicl::ParameterSet& ps)
00046 : request_port_(ps.get<int>("request_port", 3001))
00047 , request_addr_(ps.get<std::string>("request_address", "227.128.12.26"))
00048 , multicast_out_addr_(ps.get<std::string>("multicast_interface_ip", "0.0.0.0"))
00049 , running_(false)
00050 , requests_()
00051 , request_timing_()
00052 , request_stop_requested_(false)
00053 , request_received_(false)
00054 , end_of_run_timeout_ms_(ps.get<size_t>("end_of_run_quiet_timeout_ms", 1000))
00055 , should_stop_(false)
00056 , highest_seen_request_(0)
00057 {
00058 setupRequestListener();
00059 }
00060
00061 void artdaq::RequestReceiver::setupRequestListener()
00062 {
00063 TLOG(TLVL_INFO) << "Setting up request listen socket, rank=" << my_rank << ", address=" << request_addr_ << ":" << request_port_ ;
00064 request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
00065 if (request_socket_ < 0)
00066 {
00067 TLOG(TLVL_ERROR) << "Error creating socket for receiving data requests! err=" << strerror(errno) ;
00068 exit(1);
00069 }
00070
00071 struct sockaddr_in si_me_request;
00072
00073 int yes = 1;
00074 if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
00075 {
00076 TLOG(TLVL_ERROR) << "Unable to enable port reuse on request socket, err=" << strerror(errno) ;
00077 exit(1);
00078 }
00079 memset(&si_me_request, 0, sizeof(si_me_request));
00080 si_me_request.sin_family = AF_INET;
00081 si_me_request.sin_port = htons(request_port_);
00082 si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
00083 if (bind(request_socket_, (struct sockaddr *)&si_me_request, sizeof(si_me_request)) == -1)
00084 {
00085 TLOG(TLVL_ERROR) << "Cannot bind request socket to port " << request_port_ << ", err=" << strerror(errno) ;
00086 exit(1);
00087 }
00088
00089 if (request_addr_ != "localhost")
00090 {
00091 struct ip_mreq mreq;
00092 int sts = ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
00093 if (sts == -1)
00094 {
00095 TLOG(TLVL_ERROR) << "Unable to resolve multicast request address, err=" << strerror(errno) ;
00096 exit(1);
00097 }
00098 sts = GetInterfaceForNetwork(multicast_out_addr_.c_str(), mreq.imr_interface);
00099 if (sts == -1)
00100 {
00101 TLOG(TLVL_ERROR) << "Unable to resolve hostname for " << multicast_out_addr_;
00102 exit(1);
00103 }
00104 if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
00105 {
00106 TLOG(TLVL_ERROR) << "Unable to join multicast group, err=" << strerror(errno) ;
00107 exit(1);
00108 }
00109 }
00110 TLOG(TLVL_INFO) << "Done setting up request socket, rank=" << my_rank ;
00111 }
00112
00113 artdaq::RequestReceiver::~RequestReceiver()
00114 {
00115 stopRequestReceiverThread();
00116 }
00117
00118 void artdaq::RequestReceiver::stopRequestReceiverThread()
00119 {
00120 std::unique_lock<std::mutex> lk(state_mutex_);
00121 if (!request_received_)
00122 {
00123 TLOG(TLVL_ERROR) << "Stop request received by RequestReceiver, but no requests have ever been received." << std::endl
00124 << "Check that UDP port " << request_port_ << " is open in the firewall config.";
00125 }
00126 should_stop_ = true;
00127 TLOG(TLVL_DEBUG) << "Joining requestThread";
00128 if (requestThread_.joinable()) requestThread_.join();
00129 while (running_) usleep(10000);
00130
00131 if (request_socket_ != -1) {
00132 close(request_socket_);
00133 request_socket_ = -1;
00134 }
00135 request_received_ = false;
00136 highest_seen_request_ = 0;
00137 }
00138
00139 void artdaq::RequestReceiver::startRequestReceiverThread()
00140 {
00141 std::unique_lock<std::mutex> lk(state_mutex_);
00142 if (requestThread_.joinable()) requestThread_.join();
00143 should_stop_ = false;
00144 request_stop_requested_ = false;
00145
00146 if (request_socket_ == -1) {
00147 TLOG(TLVL_INFO) << "Connecting Request Reception socket";
00148 setupRequestListener();
00149 }
00150
00151 TLOG(TLVL_INFO) << "Starting Request Reception Thread" ;
00152 requestThread_ = boost::thread(&RequestReceiver::receiveRequestsLoop, this);
00153 running_ = true;
00154 }
00155
00156 void artdaq::RequestReceiver::receiveRequestsLoop()
00157 {
00158 while (!should_stop_)
00159 {
00160 TLOG(16) << "receiveRequestsLoop: Polling Request socket for new requests" ;
00161
00162 if (request_socket_ == -1)
00163 {
00164 setupRequestListener();
00165 }
00166
00167 int ms_to_wait = 10;
00168 struct pollfd ufds[1];
00169 ufds[0].fd = request_socket_;
00170 ufds[0].events = POLLIN | POLLPRI | POLLERR;
00171 int rv = poll(ufds, 1, ms_to_wait);
00172
00173
00174 if (rv <= 0 || (ufds[0].revents != POLLIN && ufds[0].revents != POLLPRI))
00175 {
00176 if (rv == 1 && (ufds[0].revents == POLLNVAL || ufds[0].revents == POLLERR))
00177 {
00178 close(request_socket_);
00179 request_socket_ = -1;
00180 }
00181 if (request_stop_requested_ && TimeUtils::GetElapsedTimeMilliseconds(request_stop_timeout_) > end_of_run_timeout_ms_)
00182 {
00183 break;
00184 }
00185 continue;
00186 }
00187
00188 TLOG(11) << "Recieved packet on Request channel" ;
00189 artdaq::detail::RequestHeader hdr_buffer;
00190 auto sts = recv(request_socket_, &hdr_buffer, sizeof(hdr_buffer), 0);
00191 if (sts < 0)
00192 {
00193 TLOG(TLVL_ERROR) << "Error receiving request message header err=" << strerror(errno);
00194 close(request_socket_);
00195 request_socket_ = -1;
00196 continue;
00197 }
00198 TLOG(11) << "Request header word: 0x" << std::hex << hdr_buffer.header ;
00199 if (!hdr_buffer.isValid()) continue;
00200
00201 request_received_ = true;
00202 if (hdr_buffer.mode == artdaq::detail::RequestMessageMode::EndOfRun)
00203 {
00204 TLOG(TLVL_INFO) << "Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests..." ;
00205 request_stop_timeout_ = std::chrono::steady_clock::now();
00206 request_stop_requested_ = true;
00207 }
00208
00209 std::vector<artdaq::detail::RequestPacket> pkt_buffer(hdr_buffer.packet_count);
00210 size_t recvd = 0;
00211 while (recvd < sizeof(artdaq::detail::RequestPacket) * hdr_buffer.packet_count)
00212 {
00213 ssize_t this_recv = recv(request_socket_, reinterpret_cast<uint8_t*>(&pkt_buffer[0]) + recvd, sizeof(artdaq::detail::RequestPacket) * hdr_buffer.packet_count - recvd, 0);
00214 if (this_recv < 0)
00215 {
00216 TLOG(TLVL_ERROR) << "Error receiving request message data err=" << strerror(errno);
00217 close(request_socket_);
00218 request_socket_ = -1;
00219 continue;
00220
00221 }
00222 recvd += this_recv;
00223 }
00224 bool anyNew = false;
00225
00226 if (should_stop_) break;
00227
00228 for (auto& buffer : pkt_buffer)
00229 {
00230 if (!buffer.isValid()) continue;
00231 if (requests_.count(buffer.sequence_id) && requests_[buffer.sequence_id] != buffer.timestamp)
00232 {
00233 TLOG(TLVL_ERROR) << "Received conflicting request for SeqID "
00234 << std::to_string(buffer.sequence_id) << "!"
00235 << " Old ts=" << std::to_string(requests_[buffer.sequence_id])
00236 << ", new ts=" << std::to_string(buffer.timestamp) << ". Keeping OLD!" ;
00237 }
00238 else if (!requests_.count(buffer.sequence_id))
00239 {
00240 int delta = buffer.sequence_id - highest_seen_request_;
00241 TLOG(11) << "Recieved request for sequence ID " << std::to_string(buffer.sequence_id)
00242 << " and timestamp " << std::to_string(buffer.timestamp) << " (delta: " << delta << ")" ;
00243 if (delta < 0)
00244 {
00245 TLOG(11) << "Already serviced this request! Ignoring..." ;
00246 }
00247 else
00248 {
00249 std::unique_lock<std::mutex> tlk(request_mutex_);
00250 requests_[buffer.sequence_id] = buffer.timestamp;
00251 request_timing_[buffer.sequence_id] = std::chrono::steady_clock::now();
00252 anyNew = true;
00253 }
00254 }
00255 }
00256 if (anyNew) {
00257 request_cv_.notify_all();
00258 }
00259 }
00260 TLOG(TLVL_DEBUG) << "Ending Request Thread";
00261 running_ = false;
00262 }
00263
00264 void artdaq::RequestReceiver::RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
00265 {
00266 std::unique_lock<std::mutex> lk(request_mutex_);
00267 requests_.erase(reqID);
00268 if (reqID > highest_seen_request_ && !should_stop_)
00269 {
00270 TLOG(18) << "Setting highest_seen_request_ to " << reqID;
00271 highest_seen_request_ = reqID;
00272 }
00273
00274 if (metricMan)
00275 {
00276 metricMan->sendMetric("Request Response Time", TimeUtils::GetElapsedTime(request_timing_[reqID]), "seconds", 2, MetricMode::Average);
00277 }
00278 request_timing_.erase(reqID);
00279 }