1 #define TRACE_NAME "RequestReceiver"
3 #include "artdaq/DAQrate/RequestReceiver.hh"
4 #include "artdaq/DAQdata/Globals.hh"
5 #include "artdaq/DAQrate/detail/RequestMessage.hh"
7 #include <boost/exception/all.hpp>
8 #include <boost/throw_exception.hpp>
13 #include "canvas/Utilities/Exception.h"
14 #include "cetlib_except/exception.h"
15 #include "fhiclcpp/ParameterSet.h"
17 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
18 #include "artdaq-core/Data/Fragment.hh"
19 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
20 #include "artdaq-core/Utilities/ExceptionHandler.hh"
21 #include "artdaq-core/Utilities/TimeUtils.hh"
34 , request_addr_(
"227.128.12.26")
38 , request_stop_requested_(false)
39 , request_received_(false)
40 , end_of_run_timeout_ms_(1000)
42 , highest_seen_request_(0)
43 , out_of_order_requests_()
44 , request_increment_(1)
48 : request_port_(ps.get<int>(
"request_port", 3001))
49 , request_addr_(ps.get<std::string>(
"request_address",
"227.128.12.26"))
50 , multicast_out_addr_(ps.get<std::string>(
"multicast_interface_ip",
"0.0.0.0"))
54 , request_stop_requested_(false)
55 , request_received_(false)
56 , end_of_run_timeout_ms_(ps.get<size_t>(
"end_of_run_quiet_timeout_ms", 1000))
58 , highest_seen_request_(0)
59 , out_of_order_requests_()
60 , request_increment_(ps.get<artdaq::Fragment::sequence_id_t>(
"request_increment", 1))
67 TLOG(TLVL_INFO) <<
"Setting up request listen socket, rank=" << my_rank <<
", address=" << request_addr_ <<
":" << request_port_;
68 request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
69 if (request_socket_ < 0)
71 TLOG(TLVL_ERROR) <<
"Error creating socket for receiving data requests! err=" << strerror(errno);
75 struct sockaddr_in si_me_request;
78 if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
80 TLOG(TLVL_ERROR) <<
"Unable to enable port reuse on request socket, err=" << strerror(errno);
83 memset(&si_me_request, 0,
sizeof(si_me_request));
84 si_me_request.sin_family = AF_INET;
85 si_me_request.sin_port = htons(request_port_);
86 si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
87 if (bind(request_socket_, (
struct sockaddr *)&si_me_request,
sizeof(si_me_request)) == -1)
89 TLOG(TLVL_ERROR) <<
"Cannot bind request socket to port " << request_port_ <<
", err=" << strerror(errno);
93 if (request_addr_ !=
"localhost")
96 int sts =
ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
99 TLOG(TLVL_ERROR) <<
"Unable to resolve multicast request address, err=" << strerror(errno);
105 TLOG(TLVL_ERROR) <<
"Unable to resolve hostname for " << multicast_out_addr_;
108 if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq,
sizeof(mreq)) < 0)
110 TLOG(TLVL_ERROR) <<
"Unable to join multicast group, err=" << strerror(errno);
114 TLOG(TLVL_INFO) <<
"Done setting up request socket, rank=" << my_rank;
117 artdaq::RequestReceiver::~RequestReceiver()
119 stopRequestReceiverThread(
true);
124 std::unique_lock<std::mutex> lk(state_mutex_);
125 if (!request_received_ && !force)
127 TLOG(TLVL_ERROR) <<
"Stop request received by RequestReceiver, but no requests have ever been received." << std::endl
128 <<
"Check that UDP port " << request_port_ <<
" is open in the firewall config.";
131 TLOG(TLVL_DEBUG) <<
"Joining requestThread";
132 if (requestThread_.joinable()) requestThread_.join();
135 if (once) TLOG(TLVL_ERROR) <<
"running_ is true after thread join! Should NOT happen";
140 if (request_socket_ != -1)
142 close(request_socket_);
143 request_socket_ = -1;
145 request_received_ =
false;
146 highest_seen_request_ = 0;
151 std::unique_lock<std::mutex> lk(state_mutex_);
152 if (requestThread_.joinable()) requestThread_.join();
153 should_stop_ =
false;
154 request_stop_requested_ =
false;
156 if (request_socket_ == -1)
158 TLOG(TLVL_INFO) <<
"Connecting Request Reception socket";
159 setupRequestListener();
162 TLOG(TLVL_INFO) <<
"Starting Request Reception Thread";
169 while (!should_stop_)
171 TLOG(16) <<
"receiveRequestsLoop: Polling Request socket for new requests";
173 if (request_socket_ == -1)
175 setupRequestListener();
179 struct pollfd ufds[1];
180 ufds[0].fd = request_socket_;
181 ufds[0].events = POLLIN | POLLPRI | POLLERR;
182 int rv = poll(ufds, 1, ms_to_wait);
185 if (rv <= 0 || (ufds[0].revents != POLLIN && ufds[0].revents != POLLPRI))
187 if (rv == 1 && (ufds[0].revents & (POLLNVAL | POLLERR | POLLHUP)))
189 close(request_socket_);
190 request_socket_ = -1;
192 if (request_stop_requested_ && TimeUtils::GetElapsedTimeMilliseconds(request_stop_timeout_) > end_of_run_timeout_ms_)
199 TLOG(11) <<
"Recieved packet on Request channel";
201 auto sts = recv(request_socket_, &hdr_buffer,
sizeof(hdr_buffer), 0);
204 TLOG(TLVL_ERROR) <<
"Error receiving request message header err=" << strerror(errno);
205 close(request_socket_);
206 request_socket_ = -1;
209 TLOG(11) <<
"Request header word: 0x" << std::hex << hdr_buffer.
header;
210 if (!hdr_buffer.
isValid())
continue;
212 request_received_ =
true;
215 TLOG(TLVL_INFO) <<
"Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests...";
216 request_stop_timeout_ = std::chrono::steady_clock::now();
217 request_stop_requested_ =
true;
220 std::vector<artdaq::detail::RequestPacket> pkt_buffer(hdr_buffer.
packet_count);
227 TLOG(TLVL_ERROR) <<
"Error receiving request message data err=" << strerror(errno);
228 close(request_socket_);
229 request_socket_ = -1;
237 if (should_stop_)
break;
239 for (
auto& buffer : pkt_buffer)
241 TLOG(20) <<
"Request Packet: hdr=" << buffer.header <<
", seq=" << buffer.sequence_id <<
", ts=" << buffer.timestamp;
242 if (!buffer.isValid())
continue;
243 if (requests_.count(buffer.sequence_id) && requests_[buffer.sequence_id] != buffer.timestamp)
245 TLOG(TLVL_ERROR) <<
"Received conflicting request for SeqID "
246 << buffer.sequence_id <<
"!"
247 <<
" Old ts=" << requests_[buffer.sequence_id]
248 <<
", new ts=" << buffer.timestamp <<
". Keeping OLD!";
250 else if (!requests_.count(buffer.sequence_id))
252 int delta = buffer.sequence_id - highest_seen_request_;
253 TLOG(11) <<
"Recieved request for sequence ID " << buffer.sequence_id
254 <<
" and timestamp " << buffer.timestamp <<
" (delta: " << delta <<
")";
255 if (delta <= 0 || out_of_order_requests_.count(buffer.sequence_id))
257 TLOG(11) <<
"Already serviced this request! Ignoring...";
261 std::unique_lock<std::mutex> tlk(request_mutex_);
262 requests_[buffer.sequence_id] = buffer.timestamp;
263 request_timing_[buffer.sequence_id] = std::chrono::steady_clock::now();
270 request_cv_.notify_all();
273 TLOG(TLVL_DEBUG) <<
"Ending Request Thread";
279 TLOG(10) <<
"RemoveRequest: Removing request with id " << reqID;
280 std::unique_lock<std::mutex> lk(request_mutex_);
281 requests_.erase(reqID);
283 if (reqID > highest_seen_request_)
285 TLOG(10) <<
"RemoveRequest: out_of_order_requests_.size() == " << out_of_order_requests_.size() <<
", reqID=" << reqID <<
", expected=" << highest_seen_request_ + request_increment_;
286 if (out_of_order_requests_.size() || reqID != highest_seen_request_ + request_increment_)
288 out_of_order_requests_.insert(reqID);
290 auto it = out_of_order_requests_.begin();
291 while (it != out_of_order_requests_.end() && !should_stop_)
293 if (*it == highest_seen_request_ + request_increment_)
295 highest_seen_request_ = *it;
296 it = out_of_order_requests_.erase(it);
306 highest_seen_request_ = reqID;
308 TLOG(10) <<
"RemoveRequest: reqID=" << reqID <<
" Setting highest_seen_request_ to " << highest_seen_request_;
312 metricMan->sendMetric(
"Request Response Time", TimeUtils::GetElapsedTime(request_timing_[reqID]),
"seconds", 2, MetricMode::Average);
314 request_timing_.erase(reqID);
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
End of Run mode (Used to end request processing on receiver)
void startRequestReceiverThread()
Function that launches the data request receiver thread (receiveRequestsLoop())
RequestReceiver()
RequestReceiver Default Constructor.
int GetInterfaceForNetwork(char const *host_in, in_addr &addr)
Convert an IP address to the network address of the interface sharing the subnet mask.
void stopRequestReceiverThread(bool force=false)
Stop the data request receiver thread (receiveRequestsLoop)
void setupRequestListener()
Opens the socket used to listen for data requests.
void receiveRequestsLoop()
This function receives data request packets, adding new requests to the request list.
The RequestPacket contains information about a single data request.
void RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
Remove the request with the given sequence ID from the request map