1 #define TRACE_NAME (app_name + "_RequestReceiver").c_str()
2 #include "artdaq/DAQdata/Globals.hh"
4 #include "artdaq/DAQrate/RequestReceiver.hh"
5 #include "artdaq/DAQdata/Globals.hh"
6 #include "artdaq/DAQrate/detail/RequestMessage.hh"
8 #include <boost/exception/all.hpp>
9 #include <boost/throw_exception.hpp>
14 #include "canvas/Utilities/Exception.h"
15 #include "cetlib_except/exception.h"
16 #include "fhiclcpp/ParameterSet.h"
18 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
19 #include "artdaq-core/Data/Fragment.hh"
20 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
21 #include "artdaq-core/Utilities/ExceptionHandler.hh"
22 #include "artdaq-core/Utilities/TimeUtils.hh"
31 #include <arpa/inet.h>
32 #include <netinet/in.h>
37 , request_addr_(
"227.128.12.26")
42 , request_stop_requested_(false)
43 , request_received_(false)
44 , end_of_run_timeout_ms_(1000)
46 , highest_seen_request_(0)
47 , out_of_order_requests_()
48 , request_increment_(1)
52 : request_port_(ps.get<int>(
"request_port", 3001))
53 , request_addr_(ps.get<std::string>(
"request_address",
"227.128.12.26"))
54 , multicast_out_addr_(ps.get<std::string>(
"multicast_interface_ip",
"0.0.0.0"))
59 , request_stop_requested_(false)
60 , request_received_(false)
61 , end_of_run_timeout_ms_(ps.get<size_t>(
"end_of_run_quiet_timeout_ms", 1000))
63 , highest_seen_request_(0)
64 , out_of_order_requests_()
65 , request_increment_(ps.get<artdaq::Fragment::sequence_id_t>(
"request_increment", 1))
72 TLOG(TLVL_INFO) <<
"Setting up request listen socket, rank=" << my_rank <<
", address=" << request_addr_ <<
":" << request_port_;
73 request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
74 if (request_socket_ < 0)
76 TLOG(TLVL_ERROR) <<
"Error creating socket for receiving data requests! err=" << strerror(errno);
80 struct sockaddr_in si_me_request;
83 if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
85 TLOG(TLVL_ERROR) <<
"Unable to enable port reuse on request socket, err=" << strerror(errno);
88 memset(&si_me_request, 0,
sizeof(si_me_request));
89 si_me_request.sin_family = AF_INET;
90 si_me_request.sin_port = htons(request_port_);
91 si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
92 if (bind(request_socket_, (
struct sockaddr *)&si_me_request,
sizeof(si_me_request)) == -1)
94 TLOG(TLVL_ERROR) <<
"Cannot bind request socket to port " << request_port_ <<
", err=" << strerror(errno);
98 if (request_addr_ !=
"localhost")
101 int sts =
ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
104 TLOG(TLVL_ERROR) <<
"Unable to resolve multicast request address, err=" << strerror(errno);
110 TLOG(TLVL_ERROR) <<
"Unable to resolve hostname for " << multicast_out_addr_;
113 if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq,
sizeof(mreq)) < 0)
115 TLOG(TLVL_ERROR) <<
"Unable to join multicast group, err=" << strerror(errno);
119 TLOG(TLVL_INFO) <<
"Done setting up request socket, rank=" << my_rank;
122 artdaq::RequestReceiver::~RequestReceiver()
124 stopRequestReception(
true);
129 std::unique_lock<std::mutex> lk(state_mutex_);
130 if (!request_received_ && !force)
132 TLOG(TLVL_ERROR) <<
"Stop request received by RequestReceiver, but no requests have ever been received." << std::endl
133 <<
"Check that UDP port " << request_port_ <<
" is open in the firewall config.";
137 TLOG(TLVL_DEBUG) <<
"Joining requestThread";
138 if (requestThread_.joinable()) requestThread_.join();
141 if (once) TLOG(TLVL_ERROR) <<
"running_ is true after thread join! Should NOT happen";
147 if (request_socket_ != -1)
149 close(request_socket_);
150 request_socket_ = -1;
152 request_received_ =
false;
153 highest_seen_request_ = 0;
158 std::unique_lock<std::mutex> lk(state_mutex_);
159 if (requestThread_.joinable()) requestThread_.join();
160 should_stop_ =
false;
161 request_stop_requested_ =
false;
163 if (request_socket_ == -1)
165 TLOG(TLVL_INFO) <<
"Connecting Request Reception socket";
166 setupRequestListener();
169 TLOG(TLVL_INFO) <<
"Starting Request Reception Thread";
173 catch (
const boost::exception& e)
175 TLOG(TLVL_ERROR) <<
"Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) <<
", errno=" << errno;
176 std::cerr <<
"Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) <<
", errno=" << errno << std::endl;
184 while (!should_stop_)
186 TLOG(16) <<
"receiveRequestsLoop: Polling Request socket for new requests";
188 if (request_socket_ == -1)
190 setupRequestListener();
194 struct pollfd ufds[1];
195 ufds[0].fd = request_socket_;
196 ufds[0].events = POLLIN | POLLPRI | POLLERR;
197 int rv = poll(ufds, 1, ms_to_wait);
200 if (rv <= 0 || (ufds[0].revents != POLLIN && ufds[0].revents != POLLPRI))
202 if (rv == 1 && (ufds[0].revents & (POLLNVAL | POLLERR | POLLHUP)))
204 close(request_socket_);
205 request_socket_ = -1;
207 if (request_stop_requested_ && TimeUtils::GetElapsedTimeMilliseconds(request_stop_timeout_) > end_of_run_timeout_ms_)
214 TLOG(11) <<
"Received packet on Request channel";
215 std::vector<uint8_t> buffer(MAX_REQUEST_MESSAGE_SIZE);
216 struct sockaddr_in from;
217 socklen_t len =
sizeof(from);
218 auto sts = recvfrom(request_socket_, &buffer[0], MAX_REQUEST_MESSAGE_SIZE, 0, (
struct sockaddr*)&from, &len);
221 TLOG(TLVL_ERROR) <<
"Error receiving request message header err=" << strerror(errno);
222 close(request_socket_);
223 request_socket_ = -1;
228 TLOG(11) <<
"Request header word: 0x" << std::hex << hdr_buffer->
header << std::dec <<
", packet_count: " << hdr_buffer->packet_count <<
" from rank " << hdr_buffer->rank <<
", " << inet_ntoa(from.sin_addr) <<
":" << from.sin_port <<
", run number: " << hdr_buffer->run_number;
229 if (!hdr_buffer->isValid())
continue;
231 request_received_ =
true;
234 if (run_number_ != 0 && hdr_buffer->run_number != run_number_)
236 TLOG(TLVL_WARNING) <<
"Received a Request Message with the wrong run number ("
237 << hdr_buffer->run_number <<
"), expected " << run_number_
238 <<
", ignoring this request.";
244 TLOG(TLVL_INFO) <<
"Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests...";
245 request_stop_timeout_ = std::chrono::steady_clock::now();
246 request_stop_requested_ =
true;
249 std::vector<artdaq::detail::RequestPacket> pkt_buffer(hdr_buffer->packet_count);
253 if (should_stop_)
break;
255 for (
auto& buffer : pkt_buffer)
257 TLOG(20) <<
"Request Packet: hdr=" << buffer.header <<
", seq=" << buffer.sequence_id <<
", ts=" << buffer.timestamp;
258 if (!buffer.isValid())
continue;
259 std::unique_lock<std::mutex> tlk(request_mutex_);
260 if (requests_.count(buffer.sequence_id) && requests_[buffer.sequence_id] != buffer.timestamp)
262 TLOG(TLVL_ERROR) <<
"Received conflicting request for SeqID "
263 << buffer.sequence_id <<
"!"
264 <<
" Old ts=" << requests_[buffer.sequence_id]
265 <<
", new ts=" << buffer.timestamp <<
". Keeping OLD!";
267 else if (!requests_.count(buffer.sequence_id))
269 int delta = buffer.sequence_id - highest_seen_request_;
270 TLOG(11) <<
"Received request for sequence ID " << buffer.sequence_id
271 <<
" and timestamp " << buffer.timestamp <<
" (delta: " << delta <<
")";
272 if (delta <= 0 || out_of_order_requests_.count(buffer.sequence_id))
274 TLOG(11) <<
"Already serviced this request ( sequence ID " << buffer.sequence_id <<
")! Ignoring...";
278 requests_[buffer.sequence_id] = buffer.timestamp;
279 request_timing_[buffer.sequence_id] = std::chrono::steady_clock::now();
286 request_cv_.notify_all();
289 TLOG(TLVL_DEBUG) <<
"Ending Request Thread";
295 TLOG(10) <<
"RemoveRequest: Removing request for id " << reqID;
296 std::unique_lock<std::mutex> lk(request_mutex_);
297 requests_.erase(reqID);
299 if (reqID > highest_seen_request_)
301 TLOG(10) <<
"RemoveRequest: out_of_order_requests_.size() == " << out_of_order_requests_.size() <<
", reqID=" << reqID <<
", expected=" << highest_seen_request_ + request_increment_;
302 if (out_of_order_requests_.size() || reqID != highest_seen_request_ + request_increment_)
304 out_of_order_requests_.insert(reqID);
306 auto it = out_of_order_requests_.begin();
307 while (it != out_of_order_requests_.end() && !should_stop_)
309 if (*it == highest_seen_request_ + request_increment_)
311 highest_seen_request_ = *it;
312 it = out_of_order_requests_.erase(it);
322 highest_seen_request_ = reqID;
324 TLOG(10) <<
"RemoveRequest: reqID=" << reqID <<
" Setting highest_seen_request_ to " << highest_seen_request_;
326 if (metricMan && request_timing_.count(reqID))
328 metricMan->sendMetric(
"Request Response Time", TimeUtils::GetElapsedTime(request_timing_[reqID]),
"seconds", 2, MetricMode::Average);
330 request_timing_.erase(reqID);
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
void startRequestReception()
Enables (starts) the reception of data requests.
End of Run mode (Used to end request processing on receiver)
RequestReceiver()
RequestReceiver Default Constructor.
int GetInterfaceForNetwork(char const *host_in, in_addr &addr)
Convert an IP address to the network address of the interface sharing the subnet mask.
void setupRequestListener()
Opens the socket used to listen for data requests.
void receiveRequestsLoop()
This function receives data request packets, adding new requests to the request list.
The RequestPacket contains information about a single data request.
void stopRequestReception(bool force=false)
Disables (stops) the reception of data requests.
void RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
Remove the request with the given sequence ID from the request map