1 #include "artdaq/DAQdata/Globals.hh"
2 #define TRACE_NAME (app_name + "_RequestReceiver").c_str()
4 #include "artdaq/DAQdata/Globals.hh"
5 #include "artdaq/DAQrate/RequestReceiver.hh"
6 #include "artdaq/DAQrate/detail/RequestMessage.hh"
8 #include <boost/exception/all.hpp>
9 #include <boost/throw_exception.hpp>
14 #include "canvas/Utilities/Exception.h"
15 #include "cetlib_except/exception.h"
16 #include "fhiclcpp/ParameterSet.h"
18 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
19 #include "artdaq-core/Data/Fragment.hh"
20 #include "artdaq-core/Utilities/ExceptionHandler.hh"
21 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
22 #include "artdaq-core/Utilities/TimeUtils.hh"
24 #include <arpa/inet.h>
25 #include <netinet/in.h>
36 , request_addr_(
"227.128.12.26")
42 , request_stop_requested_(false)
43 , request_received_(false)
44 , end_of_run_timeout_ms_(1000)
46 , highest_seen_request_(0)
47 , last_next_request_(0)
48 , out_of_order_requests_()
49 , request_increment_(1)
53 : request_port_(ps.get<int>(
"request_port", 3001))
54 , request_addr_(ps.get<std::string>(
"request_address",
"227.128.12.26"))
55 , multicast_in_addr_(ps.get<std::string>(
"multicast_interface_ip",
"0.0.0.0"))
61 , request_stop_requested_(false)
62 , request_received_(false)
63 , end_of_run_timeout_ms_(ps.get<size_t>(
"end_of_run_quiet_timeout_ms", 1000))
65 , highest_seen_request_(0)
66 , last_next_request_(0)
67 , out_of_order_requests_()
68 , request_increment_(ps.get<artdaq::Fragment::sequence_id_t>(
"request_increment", 1))
75 TLOG(TLVL_INFO) <<
"Setting up request listen socket, rank=" << my_rank <<
", address=" << request_addr_ <<
":" << request_port_
76 <<
", multicast interface=" << multicast_in_addr_;
77 request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
78 if (request_socket_ < 0)
80 TLOG(TLVL_ERROR) <<
"Error creating socket for receiving data requests! err=" << strerror(errno);
84 struct sockaddr_in si_me_request;
87 if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
89 TLOG(TLVL_ERROR) <<
"Unable to enable port reuse on request socket, err=" << strerror(errno);
92 memset(&si_me_request, 0,
sizeof(si_me_request));
93 si_me_request.sin_family = AF_INET;
94 si_me_request.sin_port = htons(request_port_);
95 si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
96 if (bind(request_socket_, (
struct sockaddr*)&si_me_request,
sizeof(si_me_request)) == -1)
98 TLOG(TLVL_ERROR) <<
"Cannot bind request socket to port " << request_port_ <<
", err=" << strerror(errno);
102 if (request_addr_ !=
"localhost")
105 int sts =
ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
108 TLOG(TLVL_ERROR) <<
"Unable to resolve multicast request address, err=" << strerror(errno);
114 TLOG(TLVL_ERROR) <<
"Unable to determine the multicast network interface for " << multicast_in_addr_;
117 char addr_str[INET_ADDRSTRLEN];
118 inet_ntop(AF_INET, &(mreq.imr_interface), addr_str, INET_ADDRSTRLEN);
119 TLOG(TLVL_INFO) <<
"Successfully determined the multicast network interface for " << multicast_in_addr_ <<
": " << addr_str <<
" (RequestReceiver)";
120 if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq,
sizeof(mreq)) < 0)
122 TLOG(TLVL_ERROR) <<
"Unable to join multicast group, err=" << strerror(errno);
126 TLOG(TLVL_INFO) <<
"Done setting up request socket, rank=" << my_rank;
129 artdaq::RequestReceiver::~RequestReceiver()
131 stopRequestReception(
true);
136 std::unique_lock<std::mutex> lk(state_mutex_);
137 if (!request_received_ && !force)
139 TLOG(TLVL_ERROR) <<
"Stop request received by RequestReceiver, but no requests have ever been received." << std::endl
140 <<
"Check that UDP port " << request_port_ <<
" is open in the firewall config.";
145 TLOG(TLVL_DEBUG) <<
"Joining requestThread";
146 if (requestThread_.joinable()) requestThread_.join();
150 if (once) TLOG(TLVL_ERROR) <<
"running_ is true after thread join! Should NOT happen";
156 if (request_socket_ != -1)
158 close(request_socket_);
159 request_socket_ = -1;
161 request_received_ =
false;
162 highest_seen_request_ = 0;
163 last_next_request_ = 0;
168 std::unique_lock<std::mutex> lk(state_mutex_);
169 if (requestThread_.joinable()) requestThread_.join();
170 should_stop_ =
false;
171 request_stop_requested_ =
false;
173 if (request_socket_ == -1)
175 TLOG(TLVL_INFO) <<
"Connecting Request Reception socket";
176 setupRequestListener();
179 TLOG(TLVL_INFO) <<
"Starting Request Reception Thread";
184 catch (
const boost::exception& e)
186 TLOG(TLVL_ERROR) <<
"Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) <<
", errno=" << errno;
187 std::cerr <<
"Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) <<
", errno=" << errno << std::endl;
195 while (!should_stop_)
197 TLOG(16) <<
"receiveRequestsLoop: Polling Request socket for new requests";
199 if (request_socket_ == -1)
201 setupRequestListener();
205 struct pollfd ufds[1];
206 ufds[0].fd = request_socket_;
207 ufds[0].events = POLLIN | POLLPRI | POLLERR;
208 int rv = poll(ufds, 1, ms_to_wait);
211 if (rv <= 0 || (ufds[0].revents != POLLIN && ufds[0].revents != POLLPRI))
213 if (rv == 1 && (ufds[0].revents & (POLLNVAL | POLLERR | POLLHUP)))
215 close(request_socket_);
216 request_socket_ = -1;
218 if (request_stop_requested_ && TimeUtils::GetElapsedTimeMilliseconds(request_stop_timeout_) > end_of_run_timeout_ms_)
225 TLOG(11) <<
"Received packet on Request channel";
226 std::vector<uint8_t> buffer(MAX_REQUEST_MESSAGE_SIZE);
227 struct sockaddr_in from;
228 socklen_t len =
sizeof(from);
229 auto sts = recvfrom(request_socket_, &buffer[0], MAX_REQUEST_MESSAGE_SIZE, 0, (
struct sockaddr*)&from, &len);
232 TLOG(TLVL_ERROR) <<
"Error receiving request message header err=" << strerror(errno);
233 close(request_socket_);
234 request_socket_ = -1;
239 TLOG(11) <<
"Request header word: 0x" << std::hex << hdr_buffer->
header << std::dec <<
", packet_count: " << hdr_buffer->packet_count <<
" from rank " << hdr_buffer->rank <<
", " << inet_ntoa(from.sin_addr) <<
":" << from.sin_port <<
", run number: " << hdr_buffer->run_number;
240 if (!hdr_buffer->isValid())
continue;
242 request_received_ =
true;
245 if (run_number_ != 0 && hdr_buffer->run_number != run_number_)
247 TLOG(TLVL_WARNING) <<
"Received a Request Message with the wrong run number ("
248 << hdr_buffer->run_number <<
"), expected " << run_number_
249 <<
", ignoring this request.";
255 TLOG(TLVL_INFO) <<
"Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests...";
256 request_stop_timeout_ = std::chrono::steady_clock::now();
257 request_stop_requested_ =
true;
260 std::vector<artdaq::detail::RequestPacket> pkt_buffer(hdr_buffer->packet_count);
264 if (should_stop_)
break;
266 for (
auto& buffer : pkt_buffer)
268 TLOG(20) <<
"Request Packet: hdr=" << buffer.header <<
", seq=" << buffer.sequence_id <<
", ts=" << buffer.timestamp;
269 if (!buffer.isValid())
continue;
270 std::unique_lock<std::mutex> tlk(request_mutex_);
271 if (requests_.count(buffer.sequence_id) && requests_[buffer.sequence_id] != buffer.timestamp)
273 TLOG(TLVL_ERROR) <<
"Received conflicting request for SeqID "
274 << buffer.sequence_id <<
"!"
275 <<
" Old ts=" << requests_[buffer.sequence_id]
276 <<
", new ts=" << buffer.timestamp <<
". Keeping OLD!";
278 else if (!requests_.count(buffer.sequence_id))
280 int delta = buffer.sequence_id - highest_seen_request_;
281 TLOG(11) <<
"Received request for sequence ID " << buffer.sequence_id
282 <<
" and timestamp " << buffer.timestamp <<
" (delta: " << delta <<
")";
283 if (delta <= 0 || out_of_order_requests_.count(buffer.sequence_id))
285 TLOG(11) <<
"Already serviced this request ( sequence ID " << buffer.sequence_id <<
")! Ignoring...";
289 requests_[buffer.sequence_id] = buffer.timestamp;
290 request_timing_[buffer.sequence_id] = std::chrono::steady_clock::now();
297 request_cv_.notify_all();
300 TLOG(TLVL_DEBUG) <<
"Ending Request Thread";
306 std::unique_lock<std::mutex> lk(request_mutex_);
308 auto it = requests_.begin();
309 while (it != requests_.end() && it->first <= last_next_request_) { ++it; }
311 if (it == requests_.end())
313 return std::make_pair<artdaq::Fragment::sequence_id_t, artdaq::Fragment::timestamp_t>(0, 0);
316 last_next_request_ = it->first;
322 TLOG(10) <<
"RemoveRequest: Removing request for id " << reqID;
323 std::unique_lock<std::mutex> lk(request_mutex_);
324 requests_.erase(reqID);
326 if (reqID > highest_seen_request_)
328 TLOG(10) <<
"RemoveRequest: out_of_order_requests_.size() == " << out_of_order_requests_.size() <<
", reqID=" << reqID <<
", expected=" << highest_seen_request_ + request_increment_;
329 if (out_of_order_requests_.size() || reqID != highest_seen_request_ + request_increment_)
331 out_of_order_requests_.insert(reqID);
333 auto it = out_of_order_requests_.begin();
334 while (it != out_of_order_requests_.end() && !should_stop_)
336 if (*it == highest_seen_request_ + request_increment_)
338 highest_seen_request_ = *it;
339 it = out_of_order_requests_.erase(it);
349 highest_seen_request_ = reqID;
351 TLOG(10) <<
"RemoveRequest: reqID=" << reqID <<
" Setting highest_seen_request_ to " << highest_seen_request_;
353 if (metricMan && request_timing_.count(reqID))
355 metricMan->sendMetric(
"Request Response Time", TimeUtils::GetElapsedTime(request_timing_[reqID]),
"seconds", 2, MetricMode::Average);
357 request_timing_.erase(reqID);
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
void startRequestReception()
Enables (starts) the reception of data requests.
End of Run mode (Used to end request processing on receiver)
RequestReceiver()
RequestReceiver Default Constructor.
int GetInterfaceForNetwork(char const *host_in, in_addr &addr)
Convert an IP address to the network address of the interface sharing the subnet mask.
void setupRequestListener()
Opens the socket used to listen for data requests.
void receiveRequestsLoop()
This function receives data request packets, adding new requests to the request list.
The RequestPacket contains information about a single data request.
std::pair< artdaq::Fragment::sequence_id_t, artdaq::Fragment::timestamp_t > GetNextRequest()
Get the "next" request, i.e. the first unsatisfied request that has not already been returned by GetN...
void stopRequestReception(bool force=false)
Disables (stops) the reception of data requests.
void RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
Remove the request with the given sequence ID from the request map