1 #define TRACE_NAME (app_name + "_RequestReceiver").c_str()
2 #include "artdaq/DAQdata/Globals.hh"
4 #include "artdaq/DAQrate/RequestReceiver.hh"
5 #include "artdaq/DAQdata/Globals.hh"
6 #include "artdaq/DAQrate/detail/RequestMessage.hh"
8 #include <boost/exception/all.hpp>
9 #include <boost/throw_exception.hpp>
14 #include "canvas/Utilities/Exception.h"
15 #include "cetlib_except/exception.h"
16 #include "fhiclcpp/ParameterSet.h"
18 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
19 #include "artdaq-core/Data/Fragment.hh"
20 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
21 #include "artdaq-core/Utilities/ExceptionHandler.hh"
22 #include "artdaq-core/Utilities/TimeUtils.hh"
31 #include <arpa/inet.h>
32 #include <netinet/in.h>
37 , request_addr_(
"227.128.12.26")
43 , request_stop_requested_(false)
44 , request_received_(false)
45 , end_of_run_timeout_ms_(1000)
47 , highest_seen_request_(0)
48 , out_of_order_requests_()
49 , request_increment_(1)
53 : request_port_(ps.get<int>(
"request_port", 3001))
54 , request_addr_(ps.get<std::string>(
"request_address",
"227.128.12.26"))
55 , multicast_out_addr_(ps.get<std::string>(
"multicast_interface_ip",
"0.0.0.0"))
61 , request_stop_requested_(false)
62 , request_received_(false)
63 , end_of_run_timeout_ms_(ps.get<size_t>(
"end_of_run_quiet_timeout_ms", 1000))
65 , highest_seen_request_(0)
66 , out_of_order_requests_()
67 , request_increment_(ps.get<artdaq::Fragment::sequence_id_t>(
"request_increment", 1))
74 TLOG(TLVL_INFO) <<
"Setting up request listen socket, rank=" << my_rank <<
", address=" << request_addr_ <<
":" << request_port_;
75 request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
76 if (request_socket_ < 0)
78 TLOG(TLVL_ERROR) <<
"Error creating socket for receiving data requests! err=" << strerror(errno);
82 struct sockaddr_in si_me_request;
85 if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
87 TLOG(TLVL_ERROR) <<
"Unable to enable port reuse on request socket, err=" << strerror(errno);
90 memset(&si_me_request, 0,
sizeof(si_me_request));
91 si_me_request.sin_family = AF_INET;
92 si_me_request.sin_port = htons(request_port_);
93 si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
94 if (bind(request_socket_, (
struct sockaddr *)&si_me_request,
sizeof(si_me_request)) == -1)
96 TLOG(TLVL_ERROR) <<
"Cannot bind request socket to port " << request_port_ <<
", err=" << strerror(errno);
100 if (request_addr_ !=
"localhost")
103 int sts =
ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
106 TLOG(TLVL_ERROR) <<
"Unable to resolve multicast request address, err=" << strerror(errno);
112 TLOG(TLVL_ERROR) <<
"Unable to resolve hostname for " << multicast_out_addr_;
115 if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq,
sizeof(mreq)) < 0)
117 TLOG(TLVL_ERROR) <<
"Unable to join multicast group, err=" << strerror(errno);
121 TLOG(TLVL_INFO) <<
"Done setting up request socket, rank=" << my_rank;
124 artdaq::RequestReceiver::~RequestReceiver()
126 stopRequestReception(
true);
131 std::unique_lock<std::mutex> lk(state_mutex_);
132 if (!request_received_ && !force)
134 TLOG(TLVL_ERROR) <<
"Stop request received by RequestReceiver, but no requests have ever been received." << std::endl
135 <<
"Check that UDP port " << request_port_ <<
" is open in the firewall config.";
139 TLOG(TLVL_DEBUG) <<
"Joining requestThread";
140 if (requestThread_.joinable()) requestThread_.join();
143 if (once) TLOG(TLVL_ERROR) <<
"running_ is true after thread join! Should NOT happen";
149 if (request_socket_ != -1)
151 close(request_socket_);
152 request_socket_ = -1;
154 request_received_ =
false;
155 highest_seen_request_ = 0;
160 std::unique_lock<std::mutex> lk(state_mutex_);
161 if (requestThread_.joinable()) requestThread_.join();
162 should_stop_ =
false;
163 request_stop_requested_ =
false;
165 if (request_socket_ == -1)
167 TLOG(TLVL_INFO) <<
"Connecting Request Reception socket";
168 setupRequestListener();
171 TLOG(TLVL_INFO) <<
"Starting Request Reception Thread";
175 catch (
const boost::exception& e)
177 TLOG(TLVL_ERROR) <<
"Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) <<
", errno=" << errno;
178 std::cerr <<
"Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) <<
", errno=" << errno << std::endl;
186 while (!should_stop_)
188 TLOG(16) <<
"receiveRequestsLoop: Polling Request socket for new requests";
190 if (request_socket_ == -1)
192 setupRequestListener();
196 struct pollfd ufds[1];
197 ufds[0].fd = request_socket_;
198 ufds[0].events = POLLIN | POLLPRI | POLLERR;
199 int rv = poll(ufds, 1, ms_to_wait);
202 if (rv <= 0 || (ufds[0].revents != POLLIN && ufds[0].revents != POLLPRI))
204 if (rv == 1 && (ufds[0].revents & (POLLNVAL | POLLERR | POLLHUP)))
206 close(request_socket_);
207 request_socket_ = -1;
209 if (request_stop_requested_ && TimeUtils::GetElapsedTimeMilliseconds(request_stop_timeout_) > end_of_run_timeout_ms_)
216 TLOG(11) <<
"Received packet on Request channel";
217 std::vector<uint8_t> buffer(MAX_REQUEST_MESSAGE_SIZE);
218 struct sockaddr_in from;
219 socklen_t len =
sizeof(from);
220 auto sts = recvfrom(request_socket_, &buffer[0], MAX_REQUEST_MESSAGE_SIZE, 0, (
struct sockaddr*)&from, &len);
223 TLOG(TLVL_ERROR) <<
"Error receiving request message header err=" << strerror(errno);
224 close(request_socket_);
225 request_socket_ = -1;
230 TLOG(11) <<
"Request header word: 0x" << std::hex << hdr_buffer->
header << std::dec <<
", packet_count: " << hdr_buffer->packet_count <<
" from rank " << hdr_buffer->rank <<
", " << inet_ntoa(from.sin_addr) <<
":" << from.sin_port <<
", run number: " << hdr_buffer->run_number;
231 if (!hdr_buffer->isValid())
continue;
233 request_received_ =
true;
236 if (run_number_ != 0 && hdr_buffer->run_number != run_number_)
238 TLOG(TLVL_WARNING) <<
"Received a Request Message with the wrong run number ("
239 << hdr_buffer->run_number <<
"), expected " << run_number_
240 <<
", ignoring this request.";
246 TLOG(TLVL_INFO) <<
"Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests...";
247 request_stop_timeout_ = std::chrono::steady_clock::now();
248 request_stop_requested_ =
true;
251 std::vector<artdaq::detail::RequestPacket> pkt_buffer(hdr_buffer->packet_count);
255 if (should_stop_)
break;
257 for (
auto& buffer : pkt_buffer)
259 TLOG(20) <<
"Request Packet: hdr=" << buffer.header <<
", seq=" << buffer.sequence_id <<
", ts=" << buffer.timestamp;
260 if (!buffer.isValid())
continue;
261 std::unique_lock<std::mutex> tlk(request_mutex_);
262 if (requests_.count(buffer.sequence_id) && requests_[buffer.sequence_id] != buffer.timestamp)
264 TLOG(TLVL_ERROR) <<
"Received conflicting request for SeqID "
265 << buffer.sequence_id <<
"!"
266 <<
" Old ts=" << requests_[buffer.sequence_id]
267 <<
", new ts=" << buffer.timestamp <<
". Keeping OLD!";
269 else if (!requests_.count(buffer.sequence_id))
271 int delta = buffer.sequence_id - highest_seen_request_;
272 TLOG(11) <<
"Received request for sequence ID " << buffer.sequence_id
273 <<
" and timestamp " << buffer.timestamp <<
" (delta: " << delta <<
")";
274 if (delta <= 0 || out_of_order_requests_.count(buffer.sequence_id))
276 TLOG(11) <<
"Already serviced this request ( sequence ID " << buffer.sequence_id <<
")! Ignoring...";
280 requests_[buffer.sequence_id] = buffer.timestamp;
281 request_timing_[buffer.sequence_id] = std::chrono::steady_clock::now();
288 request_cv_.notify_all();
291 TLOG(TLVL_DEBUG) <<
"Ending Request Thread";
297 TLOG(10) <<
"RemoveRequest: Removing request for id " << reqID;
298 std::unique_lock<std::mutex> lk(request_mutex_);
299 requests_.erase(reqID);
301 if (reqID > highest_seen_request_)
303 TLOG(10) <<
"RemoveRequest: out_of_order_requests_.size() == " << out_of_order_requests_.size() <<
", reqID=" << reqID <<
", expected=" << highest_seen_request_ + request_increment_;
304 if (out_of_order_requests_.size() || reqID != highest_seen_request_ + request_increment_)
306 out_of_order_requests_.insert(reqID);
308 auto it = out_of_order_requests_.begin();
309 while (it != out_of_order_requests_.end() && !should_stop_)
311 if (*it == highest_seen_request_ + request_increment_)
313 highest_seen_request_ = *it;
314 it = out_of_order_requests_.erase(it);
324 highest_seen_request_ = reqID;
326 TLOG(10) <<
"RemoveRequest: reqID=" << reqID <<
" Setting highest_seen_request_ to " << highest_seen_request_;
328 if (metricMan && request_timing_.count(reqID))
330 metricMan->sendMetric(
"Request Response Time", TimeUtils::GetElapsedTime(request_timing_[reqID]),
"seconds", 2, MetricMode::Average);
332 request_timing_.erase(reqID);
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
void startRequestReception()
Enables (starts) the reception of data requests.
End of Run mode (Used to end request processing on receiver)
RequestReceiver()
RequestReceiver Default Constructor.
int GetInterfaceForNetwork(char const *host_in, in_addr &addr)
Convert an IP address to the network address of the interface sharing the subnet mask.
void setupRequestListener()
Opens the socket used to listen for data requests.
void receiveRequestsLoop()
This function receives data request packets, adding new requests to the request list.
The RequestPacket contains information about a single data request.
void stopRequestReception(bool force=false)
Disables (stops) the reception of data requests.
void RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
Remove the request with the given sequence ID from the request map