artdaq  v3_06_01
RequestReceiver.cc
1 #include "artdaq/DAQdata/Globals.hh"
2 #define TRACE_NAME (app_name + "_RequestReceiver").c_str()
3 
4 #include "artdaq/DAQdata/Globals.hh"
5 #include "artdaq/DAQrate/RequestReceiver.hh"
6 #include "artdaq/DAQrate/detail/RequestMessage.hh"
7 
8 #include <boost/exception/all.hpp>
9 #include <boost/throw_exception.hpp>
10 
11 #include <iterator>
12 #include <limits>
13 
14 #include "canvas/Utilities/Exception.h"
15 #include "cetlib_except/exception.h"
16 #include "fhiclcpp/ParameterSet.h"
17 
18 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
19 #include "artdaq-core/Data/Fragment.hh"
20 #include "artdaq-core/Utilities/ExceptionHandler.hh"
21 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
22 #include "artdaq-core/Utilities/TimeUtils.hh"
23 
24 #include <arpa/inet.h>
25 #include <netinet/in.h>
26 #include <sys/poll.h>
27 #include <algorithm>
28 #include <fstream>
29 #include <iomanip>
30 #include <iostream>
31 #include <iterator>
33 
35  : request_port_(3001)
36  , request_addr_("227.128.12.26")
37  , running_(false)
38  , run_number_(0)
39  , request_socket_(-1)
40  , requests_()
41  , request_timing_()
42  , request_stop_requested_(false)
43  , request_received_(false)
44  , end_of_run_timeout_ms_(1000)
45  , should_stop_(false)
46  , highest_seen_request_(0)
47  , out_of_order_requests_()
48  , request_increment_(1)
49 {}
50 
51 artdaq::RequestReceiver::RequestReceiver(const fhicl::ParameterSet& ps)
52  : request_port_(ps.get<int>("request_port", 3001))
53  , request_addr_(ps.get<std::string>("request_address", "227.128.12.26"))
54  , multicast_in_addr_(ps.get<std::string>("multicast_interface_ip", "0.0.0.0"))
55  , running_(false)
56  , run_number_(0)
57  , request_socket_(-1)
58  , requests_()
59  , request_timing_()
60  , request_stop_requested_(false)
61  , request_received_(false)
62  , end_of_run_timeout_ms_(ps.get<size_t>("end_of_run_quiet_timeout_ms", 1000))
63  , should_stop_(false)
64  , highest_seen_request_(0)
65  , out_of_order_requests_()
66  , request_increment_(ps.get<artdaq::Fragment::sequence_id_t>("request_increment", 1))
67 {
69 }
70 
72 {
73  TLOG(TLVL_INFO) << "Setting up request listen socket, rank=" << my_rank << ", address=" << request_addr_ << ":" << request_port_
74  << ", multicast interface=" << multicast_in_addr_;
75  request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
76  if (request_socket_ < 0)
77  {
78  TLOG(TLVL_ERROR) << "Error creating socket for receiving data requests! err=" << strerror(errno);
79  exit(1);
80  }
81 
82  struct sockaddr_in si_me_request;
83 
84  int yes = 1;
85  if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
86  {
87  TLOG(TLVL_ERROR) << "Unable to enable port reuse on request socket, err=" << strerror(errno);
88  exit(1);
89  }
90  memset(&si_me_request, 0, sizeof(si_me_request));
91  si_me_request.sin_family = AF_INET;
92  si_me_request.sin_port = htons(request_port_);
93  si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
94  if (bind(request_socket_, (struct sockaddr*)&si_me_request, sizeof(si_me_request)) == -1)
95  {
96  TLOG(TLVL_ERROR) << "Cannot bind request socket to port " << request_port_ << ", err=" << strerror(errno);
97  exit(1);
98  }
99 
100  if (request_addr_ != "localhost")
101  {
102  struct ip_mreq mreq;
103  int sts = ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
104  if (sts == -1)
105  {
106  TLOG(TLVL_ERROR) << "Unable to resolve multicast request address, err=" << strerror(errno);
107  exit(1);
108  }
109  sts = GetInterfaceForNetwork(multicast_in_addr_.c_str(), mreq.imr_interface);
110  if (sts == -1)
111  {
112  TLOG(TLVL_ERROR) << "Unable to resolve hostname for " << multicast_in_addr_;
113  exit(1);
114  }
115  if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
116  {
117  TLOG(TLVL_ERROR) << "Unable to join multicast group, err=" << strerror(errno);
118  exit(1);
119  }
120  }
121  TLOG(TLVL_INFO) << "Done setting up request socket, rank=" << my_rank;
122 }
123 
124 artdaq::RequestReceiver::~RequestReceiver()
125 {
126  stopRequestReception(true);
127 }
128 
130 {
131  std::unique_lock<std::mutex> lk(state_mutex_);
132  if (!request_received_ && !force)
133  {
134  TLOG(TLVL_ERROR) << "Stop request received by RequestReceiver, but no requests have ever been received." << std::endl
135  << "Check that UDP port " << request_port_ << " is open in the firewall config.";
136  }
137  should_stop_ = true;
138  if (running_)
139  {
140  TLOG(TLVL_DEBUG) << "Joining requestThread";
141  if (requestThread_.joinable()) requestThread_.join();
142  bool once = true;
143  while (running_)
144  {
145  if (once) TLOG(TLVL_ERROR) << "running_ is true after thread join! Should NOT happen";
146  once = false;
147  usleep(10000);
148  }
149  }
150 
151  if (request_socket_ != -1)
152  {
153  close(request_socket_);
154  request_socket_ = -1;
155  }
156  request_received_ = false;
157  highest_seen_request_ = 0;
158 }
159 
161 {
162  std::unique_lock<std::mutex> lk(state_mutex_);
163  if (requestThread_.joinable()) requestThread_.join();
164  should_stop_ = false;
165  request_stop_requested_ = false;
166 
167  if (request_socket_ == -1)
168  {
169  TLOG(TLVL_INFO) << "Connecting Request Reception socket";
170  setupRequestListener();
171  }
172 
173  TLOG(TLVL_INFO) << "Starting Request Reception Thread";
174  try
175  {
176  requestThread_ = boost::thread(&RequestReceiver::receiveRequestsLoop, this);
177  }
178  catch (const boost::exception& e)
179  {
180  TLOG(TLVL_ERROR) << "Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
181  std::cerr << "Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
182  exit(5);
183  }
184  running_ = true;
185 }
186 
188 {
189  while (!should_stop_)
190  {
191  TLOG(16) << "receiveRequestsLoop: Polling Request socket for new requests";
192 
193  if (request_socket_ == -1)
194  {
195  setupRequestListener();
196  }
197 
198  int ms_to_wait = 10;
199  struct pollfd ufds[1];
200  ufds[0].fd = request_socket_;
201  ufds[0].events = POLLIN | POLLPRI | POLLERR;
202  int rv = poll(ufds, 1, ms_to_wait);
203 
204  // Continue loop if no message received or message does not have correct event ID
205  if (rv <= 0 || (ufds[0].revents != POLLIN && ufds[0].revents != POLLPRI))
206  {
207  if (rv == 1 && (ufds[0].revents & (POLLNVAL | POLLERR | POLLHUP)))
208  {
209  close(request_socket_);
210  request_socket_ = -1;
211  }
212  if (request_stop_requested_ && TimeUtils::GetElapsedTimeMilliseconds(request_stop_timeout_) > end_of_run_timeout_ms_)
213  {
214  break;
215  }
216  continue;
217  }
218 
219  TLOG(11) << "Received packet on Request channel";
220  std::vector<uint8_t> buffer(MAX_REQUEST_MESSAGE_SIZE);
221  struct sockaddr_in from;
222  socklen_t len = sizeof(from);
223  auto sts = recvfrom(request_socket_, &buffer[0], MAX_REQUEST_MESSAGE_SIZE, 0, (struct sockaddr*)&from, &len);
224  if (sts < 0)
225  {
226  TLOG(TLVL_ERROR) << "Error receiving request message header err=" << strerror(errno);
227  close(request_socket_);
228  request_socket_ = -1;
229  continue;
230  }
231 
232  auto hdr_buffer = reinterpret_cast<artdaq::detail::RequestHeader*>(&buffer[0]);
233  TLOG(11) << "Request header word: 0x" << std::hex << hdr_buffer->header << std::dec << ", packet_count: " << hdr_buffer->packet_count << " from rank " << hdr_buffer->rank << ", " << inet_ntoa(from.sin_addr) << ":" << from.sin_port << ", run number: " << hdr_buffer->run_number;
234  if (!hdr_buffer->isValid()) continue;
235 
236  request_received_ = true;
237 
238  // 19-Dec-2018, KAB: added check on current run number
239  if (run_number_ != 0 && hdr_buffer->run_number != run_number_)
240  {
241  TLOG(TLVL_WARNING) << "Received a Request Message with the wrong run number ("
242  << hdr_buffer->run_number << "), expected " << run_number_
243  << ", ignoring this request.";
244  continue;
245  }
246 
247  if (hdr_buffer->mode == artdaq::detail::RequestMessageMode::EndOfRun)
248  {
249  TLOG(TLVL_INFO) << "Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests...";
250  request_stop_timeout_ = std::chrono::steady_clock::now();
251  request_stop_requested_ = true;
252  }
253 
254  std::vector<artdaq::detail::RequestPacket> pkt_buffer(hdr_buffer->packet_count);
255  memcpy(&pkt_buffer[0], &buffer[sizeof(artdaq::detail::RequestHeader)], sizeof(artdaq::detail::RequestPacket) * hdr_buffer->packet_count);
256  bool anyNew = false;
257 
258  if (should_stop_) break;
259 
260  for (auto& buffer : pkt_buffer)
261  {
262  TLOG(20) << "Request Packet: hdr=" << /*std::dec <<*/ buffer.header << ", seq=" << buffer.sequence_id << ", ts=" << buffer.timestamp;
263  if (!buffer.isValid()) continue;
264  std::unique_lock<std::mutex> tlk(request_mutex_);
265  if (requests_.count(buffer.sequence_id) && requests_[buffer.sequence_id] != buffer.timestamp)
266  {
267  TLOG(TLVL_ERROR) << "Received conflicting request for SeqID "
268  << buffer.sequence_id << "!"
269  << " Old ts=" << requests_[buffer.sequence_id]
270  << ", new ts=" << buffer.timestamp << ". Keeping OLD!";
271  }
272  else if (!requests_.count(buffer.sequence_id))
273  {
274  int delta = buffer.sequence_id - highest_seen_request_;
275  TLOG(11) << "Received request for sequence ID " << buffer.sequence_id
276  << " and timestamp " << buffer.timestamp << " (delta: " << delta << ")";
277  if (delta <= 0 || out_of_order_requests_.count(buffer.sequence_id))
278  {
279  TLOG(11) << "Already serviced this request ( sequence ID " << buffer.sequence_id << ")! Ignoring...";
280  }
281  else
282  {
283  requests_[buffer.sequence_id] = buffer.timestamp;
284  request_timing_[buffer.sequence_id] = std::chrono::steady_clock::now();
285  anyNew = true;
286  }
287  }
288  }
289  if (anyNew)
290  {
291  request_cv_.notify_all();
292  }
293  }
294  TLOG(TLVL_DEBUG) << "Ending Request Thread";
295  running_ = false;
296 }
297 
298 void artdaq::RequestReceiver::RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
299 {
300  TLOG(10) << "RemoveRequest: Removing request for id " << reqID;
301  std::unique_lock<std::mutex> lk(request_mutex_);
302  requests_.erase(reqID);
303 
304  if (reqID > highest_seen_request_)
305  {
306  TLOG(10) << "RemoveRequest: out_of_order_requests_.size() == " << out_of_order_requests_.size() << ", reqID=" << reqID << ", expected=" << highest_seen_request_ + request_increment_;
307  if (out_of_order_requests_.size() || reqID != highest_seen_request_ + request_increment_)
308  {
309  out_of_order_requests_.insert(reqID);
310 
311  auto it = out_of_order_requests_.begin();
312  while (it != out_of_order_requests_.end() && !should_stop_) // Stop accounting for requests after stop
313  {
314  if (*it == highest_seen_request_ + request_increment_)
315  {
316  highest_seen_request_ = *it;
317  it = out_of_order_requests_.erase(it);
318  }
319  else
320  {
321  break;
322  }
323  }
324  }
325  else // no out-of-order requests and this request is highest seen + request_increment_
326  {
327  highest_seen_request_ = reqID;
328  }
329  TLOG(10) << "RemoveRequest: reqID=" << reqID << " Setting highest_seen_request_ to " << highest_seen_request_;
330  }
331  if (metricMan && request_timing_.count(reqID))
332  {
333  metricMan->sendMetric("Request Response Time", TimeUtils::GetElapsedTime(request_timing_[reqID]), "seconds", 2, MetricMode::Average);
334  }
335  request_timing_.erase(reqID);
336 }
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
Definition: TCPConnect.cc:33
void startRequestReception()
Enables (starts) the reception of data requests.
End of Run mode (Used to end request processing on receiver)
RequestReceiver()
RequestReceiver Default Constructor.
int GetInterfaceForNetwork(char const *host_in, in_addr &addr)
Convert an IP address to the network address of the interface sharing the subnet mask.
Definition: TCPConnect.cc:217
Header of a RequestMessage. Contains magic bytes for validation and a count of expected RequestPacket...
void setupRequestListener()
Opens the socket used to listen for data requests.
void receiveRequestsLoop()
This function receives data request packets, adding new requests to the request list.
The RequestPacket contains information about a single data request.
void stopRequestReception(bool force=false)
Disables (stops) the reception of data requests.
void RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
Remove the request with the given sequence ID from the request map