00001 #define TRACE_NAME "RequestReceiver"
00002
00003 #include "artdaq/DAQrate/RequestReceiver.hh"
00004 #include "artdaq/DAQdata/Globals.hh"
00005 #include "artdaq/DAQrate/detail/RequestMessage.hh"
00006
00007 #include <boost/exception/all.hpp>
00008 #include <boost/throw_exception.hpp>
00009
00010 #include <limits>
00011 #include <iterator>
00012
00013 #include "canvas/Utilities/Exception.h"
00014 #include "cetlib_except/exception.h"
00015 #include "fhiclcpp/ParameterSet.h"
00016
00017 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
00018 #include "artdaq-core/Data/Fragment.hh"
00019 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
00020 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00021 #include "artdaq-core/Utilities/TimeUtils.hh"
00022
00023 #include <fstream>
00024 #include <iomanip>
00025 #include <iterator>
00026 #include <iostream>
00027 #include <iomanip>
00028 #include <algorithm>
00029 #include <sys/poll.h>
00030 #include "artdaq/DAQdata/TCPConnect.hh"
00031
00032 artdaq::RequestReceiver::RequestReceiver()
00033 : request_port_(3001)
00034 , request_addr_("227.128.12.26")
00035 , running_(false)
00036 , requests_()
00037 , request_timing_()
00038 , request_stop_requested_(false)
00039 , request_received_(false)
00040 , end_of_run_timeout_ms_(1000)
00041 , should_stop_(false)
00042 , highest_seen_request_(0)
00043 {}
00044
00045 artdaq::RequestReceiver::RequestReceiver(const fhicl::ParameterSet& ps)
00046 : request_port_(ps.get<int>("request_port", 3001))
00047 , request_addr_(ps.get<std::string>("request_address", "227.128.12.26"))
00048 , running_(false)
00049 , requests_()
00050 , request_timing_()
00051 , request_stop_requested_(false)
00052 , request_received_(false)
00053 , end_of_run_timeout_ms_(ps.get<size_t>("end_of_run_quiet_timeout_ms", 1000))
00054 , should_stop_(false)
00055 , highest_seen_request_(0)
00056 {
00057 setupRequestListener();
00058 }
00059
00060 void artdaq::RequestReceiver::setupRequestListener()
00061 {
00062 TLOG(TLVL_INFO) << "Setting up request listen socket, rank=" << my_rank << ", address=" << request_addr_ << ":" << request_port_ ;
00063 request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
00064 if (request_socket_ < 0)
00065 {
00066 TLOG(TLVL_ERROR) << "Error creating socket for receiving data requests! err=" << strerror(errno) ;
00067 exit(1);
00068 }
00069
00070 struct sockaddr_in si_me_request;
00071
00072 int yes = 1;
00073 if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
00074 {
00075 TLOG(TLVL_ERROR) << "Unable to enable port reuse on request socket, err=" << strerror(errno) ;
00076 exit(1);
00077 }
00078 memset(&si_me_request, 0, sizeof(si_me_request));
00079 si_me_request.sin_family = AF_INET;
00080 si_me_request.sin_port = htons(request_port_);
00081 si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
00082 if (bind(request_socket_, (struct sockaddr *)&si_me_request, sizeof(si_me_request)) == -1)
00083 {
00084 TLOG(TLVL_ERROR) << "Cannot bind request socket to port " << request_port_ << ", err=" << strerror(errno) ;
00085 exit(1);
00086 }
00087
00088 if (request_addr_ != "localhost")
00089 {
00090 struct ip_mreq mreq;
00091 int sts = ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
00092 if (sts == -1)
00093 {
00094 TLOG(TLVL_ERROR) << "Unable to resolve multicast request address, err=" << strerror(errno) ;
00095 exit(1);
00096 }
00097 mreq.imr_interface.s_addr = htonl(INADDR_ANY);
00098 if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
00099 {
00100 TLOG(TLVL_ERROR) << "Unable to join multicast group, err=" << strerror(errno) ;
00101 exit(1);
00102 }
00103 }
00104 TLOG(TLVL_INFO) << "Done setting up request socket, rank=" << my_rank ;
00105 }
00106
00107 artdaq::RequestReceiver::~RequestReceiver()
00108 {
00109 stopRequestReceiverThread();
00110 }
00111
00112 void artdaq::RequestReceiver::stopRequestReceiverThread()
00113 {
00114 std::unique_lock<std::mutex> lk(state_mutex_);
00115 if (!request_received_)
00116 {
00117 TLOG(TLVL_ERROR) << "Stop request received by RequestReceiver, but no requests have ever been received." << std::endl
00118 << "Check that UDP port " << request_port_ << " is open in the firewall config.";
00119 }
00120 should_stop_ = true;
00121 TLOG(TLVL_DEBUG) << "Joining requestThread";
00122 if (requestThread_.joinable()) requestThread_.join();
00123 while (running_) usleep(10000);
00124
00125 if (request_socket_ != -1) {
00126 close(request_socket_);
00127 request_socket_ = -1;
00128 }
00129 request_received_ = false;
00130 highest_seen_request_ = 0;
00131 }
00132
00133 void artdaq::RequestReceiver::startRequestReceiverThread()
00134 {
00135 std::unique_lock<std::mutex> lk(state_mutex_);
00136 if (requestThread_.joinable()) requestThread_.join();
00137 should_stop_ = false;
00138 request_stop_requested_ = false;
00139
00140 if (request_socket_ == -1) {
00141 TLOG(TLVL_INFO) << "Connecting Request Reception socket";
00142 setupRequestListener();
00143 }
00144
00145 TLOG(TLVL_INFO) << "Starting Request Reception Thread" ;
00146 requestThread_ = boost::thread(&RequestReceiver::receiveRequestsLoop, this);
00147 running_ = true;
00148 }
00149
00150 void artdaq::RequestReceiver::receiveRequestsLoop()
00151 {
00152 while (!should_stop_)
00153 {
00154 TLOG(16) << "receiveRequestsLoop: Polling Request socket for new requests" ;
00155
00156 int ms_to_wait = 10;
00157 struct pollfd ufds[1];
00158 ufds[0].fd = request_socket_;
00159 ufds[0].events = POLLIN | POLLPRI;
00160 int rv = poll(ufds, 1, ms_to_wait);
00161
00162
00163 if (rv <= 0 || (ufds[0].revents != POLLIN && ufds[0].revents != POLLPRI))
00164 {
00165 if (request_stop_requested_ && TimeUtils::GetElapsedTimeMilliseconds(request_stop_timeout_) > end_of_run_timeout_ms_)
00166 {
00167 break;
00168 }
00169 continue;
00170 }
00171
00172 TLOG(11) << "Recieved packet on Request channel" ;
00173 artdaq::detail::RequestHeader hdr_buffer;
00174 recv(request_socket_, &hdr_buffer, sizeof(hdr_buffer), 0);
00175 TLOG(11) << "Request header word: 0x" << std::hex << hdr_buffer.header ;
00176 if (!hdr_buffer.isValid()) continue;
00177
00178 request_received_ = true;
00179 if (hdr_buffer.mode == artdaq::detail::RequestMessageMode::EndOfRun)
00180 {
00181 TLOG(TLVL_INFO) << "Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests..." ;
00182 request_stop_timeout_ = std::chrono::steady_clock::now();
00183 request_stop_requested_ = true;
00184 }
00185
00186 std::vector<artdaq::detail::RequestPacket> pkt_buffer(hdr_buffer.packet_count);
00187 recv(request_socket_, &pkt_buffer[0], sizeof(artdaq::detail::RequestPacket) * hdr_buffer.packet_count, 0);
00188 bool anyNew = false;
00189
00190 if (should_stop_) break;
00191
00192 for (auto& buffer : pkt_buffer)
00193 {
00194 if (!buffer.isValid()) continue;
00195 if (requests_.count(buffer.sequence_id) && requests_[buffer.sequence_id] != buffer.timestamp)
00196 {
00197 TLOG(TLVL_ERROR) << "Received conflicting request for SeqID "
00198 << std::to_string(buffer.sequence_id) << "!"
00199 << " Old ts=" << std::to_string(requests_[buffer.sequence_id])
00200 << ", new ts=" << std::to_string(buffer.timestamp) << ". Keeping OLD!" ;
00201 }
00202 else if (!requests_.count(buffer.sequence_id))
00203 {
00204 int delta = buffer.sequence_id - highest_seen_request_;
00205 TLOG(11) << "Recieved request for sequence ID " << std::to_string(buffer.sequence_id)
00206 << " and timestamp " << std::to_string(buffer.timestamp) << " (delta: " << delta << ")" ;
00207 if (delta < 0)
00208 {
00209 TLOG(11) << "Already serviced this request! Ignoring..." ;
00210 }
00211 else
00212 {
00213 std::unique_lock<std::mutex> tlk(request_mutex_);
00214 requests_[buffer.sequence_id] = buffer.timestamp;
00215 request_timing_[buffer.sequence_id] = std::chrono::steady_clock::now();
00216 anyNew = true;
00217 }
00218 }
00219 }
00220 if (anyNew) {
00221 request_cv_.notify_all();
00222 }
00223 }
00224 TLOG(TLVL_DEBUG) << "Ending Request Thread";
00225 running_ = false;
00226 }
00227
00228 void artdaq::RequestReceiver::RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
00229 {
00230 std::unique_lock<std::mutex> lk(request_mutex_);
00231 requests_.erase(reqID);
00232 if (reqID > highest_seen_request_ && !should_stop_)
00233 {
00234 TLOG(18) << "Setting highest_seen_request_ to " << reqID;
00235 highest_seen_request_ = reqID;
00236 }
00237
00238 if (metricMan)
00239 {
00240 metricMan->sendMetric("Request Response Time", TimeUtils::GetElapsedTime(request_timing_[reqID]), "seconds", 2, MetricMode::Average);
00241 }
00242 request_timing_.erase(reqID);
00243 }