1 #define TRACE_NAME (app_name + "_RequestReceiver").c_str()
2 #include "artdaq/DAQdata/Globals.hh"
4 #include "artdaq/DAQrate/RequestReceiver.hh"
5 #include "artdaq/DAQdata/Globals.hh"
6 #include "artdaq/DAQrate/detail/RequestMessage.hh"
8 #include <boost/exception/all.hpp>
9 #include <boost/throw_exception.hpp>
14 #include "canvas/Utilities/Exception.h"
15 #include "cetlib_except/exception.h"
16 #include "fhiclcpp/ParameterSet.h"
18 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
19 #include "artdaq-core/Data/Fragment.hh"
20 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
21 #include "artdaq-core/Utilities/ExceptionHandler.hh"
22 #include "artdaq-core/Utilities/TimeUtils.hh"
31 #include <arpa/inet.h>
32 #include <netinet/in.h>
37 , request_addr_(
"227.128.12.26")
41 , request_stop_requested_(false)
42 , request_received_(false)
43 , end_of_run_timeout_ms_(1000)
45 , highest_seen_request_(0)
46 , out_of_order_requests_()
47 , request_increment_(1)
51 : request_port_(ps.get<int>(
"request_port", 3001))
52 , request_addr_(ps.get<std::string>(
"request_address",
"227.128.12.26"))
53 , multicast_out_addr_(ps.get<std::string>(
"multicast_interface_ip",
"0.0.0.0"))
57 , request_stop_requested_(false)
58 , request_received_(false)
59 , end_of_run_timeout_ms_(ps.get<size_t>(
"end_of_run_quiet_timeout_ms", 1000))
61 , highest_seen_request_(0)
62 , out_of_order_requests_()
63 , request_increment_(ps.get<artdaq::Fragment::sequence_id_t>(
"request_increment", 1))
70 TLOG(TLVL_INFO) <<
"Setting up request listen socket, rank=" << my_rank <<
", address=" << request_addr_ <<
":" << request_port_;
71 request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
72 if (request_socket_ < 0)
74 TLOG(TLVL_ERROR) <<
"Error creating socket for receiving data requests! err=" << strerror(errno);
78 struct sockaddr_in si_me_request;
81 if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
83 TLOG(TLVL_ERROR) <<
"Unable to enable port reuse on request socket, err=" << strerror(errno);
86 memset(&si_me_request, 0,
sizeof(si_me_request));
87 si_me_request.sin_family = AF_INET;
88 si_me_request.sin_port = htons(request_port_);
89 si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
90 if (bind(request_socket_, (
struct sockaddr *)&si_me_request,
sizeof(si_me_request)) == -1)
92 TLOG(TLVL_ERROR) <<
"Cannot bind request socket to port " << request_port_ <<
", err=" << strerror(errno);
96 if (request_addr_ !=
"localhost")
99 int sts =
ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
102 TLOG(TLVL_ERROR) <<
"Unable to resolve multicast request address, err=" << strerror(errno);
108 TLOG(TLVL_ERROR) <<
"Unable to resolve hostname for " << multicast_out_addr_;
111 if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq,
sizeof(mreq)) < 0)
113 TLOG(TLVL_ERROR) <<
"Unable to join multicast group, err=" << strerror(errno);
117 TLOG(TLVL_INFO) <<
"Done setting up request socket, rank=" << my_rank;
120 artdaq::RequestReceiver::~RequestReceiver()
122 stopRequestReceiverThread(
true);
127 std::unique_lock<std::mutex> lk(state_mutex_);
128 if (!request_received_ && !force)
130 TLOG(TLVL_ERROR) <<
"Stop request received by RequestReceiver, but no requests have ever been received." << std::endl
131 <<
"Check that UDP port " << request_port_ <<
" is open in the firewall config.";
135 TLOG(TLVL_DEBUG) <<
"Joining requestThread";
136 if (requestThread_.joinable()) requestThread_.join();
139 if (once) TLOG(TLVL_ERROR) <<
"running_ is true after thread join! Should NOT happen";
145 if (request_socket_ != -1)
147 close(request_socket_);
148 request_socket_ = -1;
150 request_received_ =
false;
151 highest_seen_request_ = 0;
156 std::unique_lock<std::mutex> lk(state_mutex_);
157 if (requestThread_.joinable()) requestThread_.join();
158 should_stop_ =
false;
159 request_stop_requested_ =
false;
161 if (request_socket_ == -1)
163 TLOG(TLVL_INFO) <<
"Connecting Request Reception socket";
164 setupRequestListener();
167 TLOG(TLVL_INFO) <<
"Starting Request Reception Thread";
171 catch (
const boost::exception& e)
173 TLOG(TLVL_ERROR) <<
"Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) <<
", errno=" << errno;
174 std::cerr <<
"Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) <<
", errno=" << errno << std::endl;
182 while (!should_stop_)
184 TLOG(16) <<
"receiveRequestsLoop: Polling Request socket for new requests";
186 if (request_socket_ == -1)
188 setupRequestListener();
192 struct pollfd ufds[1];
193 ufds[0].fd = request_socket_;
194 ufds[0].events = POLLIN | POLLPRI | POLLERR;
195 int rv = poll(ufds, 1, ms_to_wait);
198 if (rv <= 0 || (ufds[0].revents != POLLIN && ufds[0].revents != POLLPRI))
200 if (rv == 1 && (ufds[0].revents & (POLLNVAL | POLLERR | POLLHUP)))
202 close(request_socket_);
203 request_socket_ = -1;
205 if (request_stop_requested_ && TimeUtils::GetElapsedTimeMilliseconds(request_stop_timeout_) > end_of_run_timeout_ms_)
212 TLOG(11) <<
"Received packet on Request channel";
213 std::vector<uint8_t> buffer(MAX_REQUEST_MESSAGE_SIZE);
214 struct sockaddr_in from;
215 socklen_t len =
sizeof(from);
216 auto sts = recvfrom(request_socket_, &buffer[0], MAX_REQUEST_MESSAGE_SIZE, 0, (
struct sockaddr*)&from, &len);
219 TLOG(TLVL_ERROR) <<
"Error receiving request message header err=" << strerror(errno);
220 close(request_socket_);
221 request_socket_ = -1;
226 TLOG(11) <<
"Request header word: 0x" << std::hex << hdr_buffer->
header <<
", packet_count: " << hdr_buffer->packet_count <<
" from rank " << hdr_buffer->rank <<
", " << inet_ntoa(from.sin_addr) <<
":" << from.sin_port;
227 if (!hdr_buffer->isValid())
continue;
229 request_received_ =
true;
232 TLOG(TLVL_INFO) <<
"Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests...";
233 request_stop_timeout_ = std::chrono::steady_clock::now();
234 request_stop_requested_ =
true;
237 std::vector<artdaq::detail::RequestPacket> pkt_buffer(hdr_buffer->packet_count);
241 if (should_stop_)
break;
243 for (
auto& buffer : pkt_buffer)
245 TLOG(20) <<
"Request Packet: hdr=" << buffer.header <<
", seq=" << buffer.sequence_id <<
", ts=" << buffer.timestamp;
246 if (!buffer.isValid())
continue;
247 std::unique_lock<std::mutex> tlk(request_mutex_);
248 if (requests_.count(buffer.sequence_id) && requests_[buffer.sequence_id] != buffer.timestamp)
250 TLOG(TLVL_ERROR) <<
"Received conflicting request for SeqID "
251 << buffer.sequence_id <<
"!"
252 <<
" Old ts=" << requests_[buffer.sequence_id]
253 <<
", new ts=" << buffer.timestamp <<
". Keeping OLD!";
255 else if (!requests_.count(buffer.sequence_id))
257 int delta = buffer.sequence_id - highest_seen_request_;
258 TLOG(11) <<
"Received request for sequence ID " << buffer.sequence_id
259 <<
" and timestamp " << buffer.timestamp <<
" (delta: " << delta <<
")";
260 if (delta <= 0 || out_of_order_requests_.count(buffer.sequence_id))
262 TLOG(11) <<
"Already serviced this request ( sequence ID " << buffer.sequence_id <<
")! Ignoring...";
266 requests_[buffer.sequence_id] = buffer.timestamp;
267 request_timing_[buffer.sequence_id] = std::chrono::steady_clock::now();
274 request_cv_.notify_all();
277 TLOG(TLVL_DEBUG) <<
"Ending Request Thread";
283 TLOG(10) <<
"RemoveRequest: Removing request for id " << reqID;
284 std::unique_lock<std::mutex> lk(request_mutex_);
285 requests_.erase(reqID);
287 if (reqID > highest_seen_request_)
289 TLOG(10) <<
"RemoveRequest: out_of_order_requests_.size() == " << out_of_order_requests_.size() <<
", reqID=" << reqID <<
", expected=" << highest_seen_request_ + request_increment_;
290 if (out_of_order_requests_.size() || reqID != highest_seen_request_ + request_increment_)
292 out_of_order_requests_.insert(reqID);
294 auto it = out_of_order_requests_.begin();
295 while (it != out_of_order_requests_.end() && !should_stop_)
297 if (*it == highest_seen_request_ + request_increment_)
299 highest_seen_request_ = *it;
300 it = out_of_order_requests_.erase(it);
310 highest_seen_request_ = reqID;
312 TLOG(10) <<
"RemoveRequest: reqID=" << reqID <<
" Setting highest_seen_request_ to " << highest_seen_request_;
314 if (metricMan && request_timing_.count(reqID))
316 metricMan->sendMetric(
"Request Response Time", TimeUtils::GetElapsedTime(request_timing_[reqID]),
"seconds", 2, MetricMode::Average);
318 request_timing_.erase(reqID);
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
End of Run mode (Used to end request processing on receiver)
void startRequestReceiverThread()
Function that launches the data request receiver thread (receiveRequestsLoop())
RequestReceiver()
RequestReceiver Default Constructor.
int GetInterfaceForNetwork(char const *host_in, in_addr &addr)
Convert an IP address to the network address of the interface sharing the subnet mask.
void stopRequestReceiverThread(bool force=false)
Stop the data request receiver thread (receiveRequestsLoop)
void setupRequestListener()
Opens the socket used to listen for data requests.
void receiveRequestsLoop()
This function receives data request packets, adding new requests to the request list.
The RequestPacket contains information about a single data request.
void RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
Remove the request with the given sequence ID from the request map