artdaq  v3_07_02
RequestReceiver.cc
1 #include "artdaq/DAQdata/Globals.hh"
2 #define TRACE_NAME (app_name + "_RequestReceiver").c_str()
3 
4 #include "artdaq/DAQdata/Globals.hh"
5 #include "artdaq/DAQrate/RequestReceiver.hh"
6 #include "artdaq/DAQrate/detail/RequestMessage.hh"
7 
8 #include <boost/exception/all.hpp>
9 #include <boost/throw_exception.hpp>
10 
11 #include <iterator>
12 #include <limits>
13 
14 #include "canvas/Utilities/Exception.h"
15 #include "cetlib_except/exception.h"
16 #include "fhiclcpp/ParameterSet.h"
17 
18 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
19 #include "artdaq-core/Data/Fragment.hh"
20 #include "artdaq-core/Utilities/ExceptionHandler.hh"
21 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
22 #include "artdaq-core/Utilities/TimeUtils.hh"
23 
24 #include <arpa/inet.h>
25 #include <netinet/in.h>
26 #include <sys/poll.h>
27 #include <algorithm>
28 #include <fstream>
29 #include <iomanip>
30 #include <iostream>
31 #include <iterator>
33 
35  : request_port_(3001)
36  , request_addr_("227.128.12.26")
37  , running_(false)
38  , run_number_(0)
39  , request_socket_(-1)
40  , requests_()
41  , request_timing_()
42  , request_stop_requested_(false)
43  , request_received_(false)
44  , end_of_run_timeout_ms_(1000)
45  , should_stop_(false)
46  , highest_seen_request_(0)
47  , last_next_request_(0)
48  , out_of_order_requests_()
49  , request_increment_(1)
50 {}
51 
52 artdaq::RequestReceiver::RequestReceiver(const fhicl::ParameterSet& ps)
53  : request_port_(ps.get<int>("request_port", 3001))
54  , request_addr_(ps.get<std::string>("request_address", "227.128.12.26"))
55  , multicast_in_addr_(ps.get<std::string>("multicast_interface_ip", "0.0.0.0"))
56  , running_(false)
57  , run_number_(0)
58  , request_socket_(-1)
59  , requests_()
60  , request_timing_()
61  , request_stop_requested_(false)
62  , request_received_(false)
63  , end_of_run_timeout_ms_(ps.get<size_t>("end_of_run_quiet_timeout_ms", 1000))
64  , should_stop_(false)
65  , highest_seen_request_(0)
66  , last_next_request_(0)
67  , out_of_order_requests_()
68  , request_increment_(ps.get<artdaq::Fragment::sequence_id_t>("request_increment", 1))
69 {
71 }
72 
74 {
75  TLOG(TLVL_INFO) << "Setting up request listen socket, rank=" << my_rank << ", address=" << request_addr_ << ":" << request_port_
76  << ", multicast interface=" << multicast_in_addr_;
77  request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
78  if (request_socket_ < 0)
79  {
80  TLOG(TLVL_ERROR) << "Error creating socket for receiving data requests! err=" << strerror(errno);
81  exit(1);
82  }
83 
84  struct sockaddr_in si_me_request;
85 
86  int yes = 1;
87  if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
88  {
89  TLOG(TLVL_ERROR) << "Unable to enable port reuse on request socket, err=" << strerror(errno);
90  exit(1);
91  }
92  memset(&si_me_request, 0, sizeof(si_me_request));
93  si_me_request.sin_family = AF_INET;
94  si_me_request.sin_port = htons(request_port_);
95  si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
96  if (bind(request_socket_, (struct sockaddr*)&si_me_request, sizeof(si_me_request)) == -1)
97  {
98  TLOG(TLVL_ERROR) << "Cannot bind request socket to port " << request_port_ << ", err=" << strerror(errno);
99  exit(1);
100  }
101 
102  if (request_addr_ != "localhost")
103  {
104  struct ip_mreq mreq;
105  int sts = ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
106  if (sts == -1)
107  {
108  TLOG(TLVL_ERROR) << "Unable to resolve multicast request address, err=" << strerror(errno);
109  exit(1);
110  }
111  sts = GetInterfaceForNetwork(multicast_in_addr_.c_str(), mreq.imr_interface);
112  if (sts == -1)
113  {
114  TLOG(TLVL_ERROR) << "Unable to determine the multicast network interface for " << multicast_in_addr_;
115  exit(1);
116  }
117  char addr_str[INET_ADDRSTRLEN];
118  inet_ntop(AF_INET, &(mreq.imr_interface), addr_str, INET_ADDRSTRLEN);
119  TLOG(TLVL_INFO) << "Successfully determined the multicast network interface for " << multicast_in_addr_ << ": " << addr_str << " (RequestReceiver)";
120  if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
121  {
122  TLOG(TLVL_ERROR) << "Unable to join multicast group, err=" << strerror(errno);
123  exit(1);
124  }
125  }
126  TLOG(TLVL_INFO) << "Done setting up request socket, rank=" << my_rank;
127 }
128 
129 artdaq::RequestReceiver::~RequestReceiver()
130 {
131  stopRequestReception(true);
132 }
133 
135 {
136  std::unique_lock<std::mutex> lk(state_mutex_);
137  if (!request_received_ && !force)
138  {
139  TLOG(TLVL_ERROR) << "Stop request received by RequestReceiver, but no requests have ever been received." << std::endl
140  << "Check that UDP port " << request_port_ << " is open in the firewall config.";
141  }
142  should_stop_ = true;
143  if (running_)
144  {
145  TLOG(TLVL_DEBUG) << "Joining requestThread";
146  if (requestThread_.joinable()) requestThread_.join();
147  bool once = true;
148  while (running_)
149  {
150  if (once) TLOG(TLVL_ERROR) << "running_ is true after thread join! Should NOT happen";
151  once = false;
152  usleep(10000);
153  }
154  }
155 
156  if (request_socket_ != -1)
157  {
158  close(request_socket_);
159  request_socket_ = -1;
160  }
161  request_received_ = false;
162  highest_seen_request_ = 0;
163  last_next_request_ = 0;
164 }
165 
167 {
168  std::unique_lock<std::mutex> lk(state_mutex_);
169  if (requestThread_.joinable()) requestThread_.join();
170  should_stop_ = false;
171  request_stop_requested_ = false;
172 
173  if (request_socket_ == -1)
174  {
175  TLOG(TLVL_INFO) << "Connecting Request Reception socket";
176  setupRequestListener();
177  }
178 
179  TLOG(TLVL_INFO) << "Starting Request Reception Thread";
180  try
181  {
182  requestThread_ = boost::thread(&RequestReceiver::receiveRequestsLoop, this);
183  }
184  catch (const boost::exception& e)
185  {
186  TLOG(TLVL_ERROR) << "Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
187  std::cerr << "Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
188  exit(5);
189  }
190  running_ = true;
191 }
192 
194 {
195  while (!should_stop_)
196  {
197  TLOG(16) << "receiveRequestsLoop: Polling Request socket for new requests";
198 
199  if (request_socket_ == -1)
200  {
201  setupRequestListener();
202  }
203 
204  int ms_to_wait = 10;
205  struct pollfd ufds[1];
206  ufds[0].fd = request_socket_;
207  ufds[0].events = POLLIN | POLLPRI | POLLERR;
208  int rv = poll(ufds, 1, ms_to_wait);
209 
210  // Continue loop if no message received or message does not have correct event ID
211  if (rv <= 0 || (ufds[0].revents != POLLIN && ufds[0].revents != POLLPRI))
212  {
213  if (rv == 1 && (ufds[0].revents & (POLLNVAL | POLLERR | POLLHUP)))
214  {
215  close(request_socket_);
216  request_socket_ = -1;
217  }
218  if (request_stop_requested_ && TimeUtils::GetElapsedTimeMilliseconds(request_stop_timeout_) > end_of_run_timeout_ms_)
219  {
220  break;
221  }
222  continue;
223  }
224 
225  TLOG(11) << "Received packet on Request channel";
226  std::vector<uint8_t> buffer(MAX_REQUEST_MESSAGE_SIZE);
227  struct sockaddr_in from;
228  socklen_t len = sizeof(from);
229  auto sts = recvfrom(request_socket_, &buffer[0], MAX_REQUEST_MESSAGE_SIZE, 0, (struct sockaddr*)&from, &len);
230  if (sts < 0)
231  {
232  TLOG(TLVL_ERROR) << "Error receiving request message header err=" << strerror(errno);
233  close(request_socket_);
234  request_socket_ = -1;
235  continue;
236  }
237 
238  auto hdr_buffer = reinterpret_cast<artdaq::detail::RequestHeader*>(&buffer[0]);
239  TLOG(11) << "Request header word: 0x" << std::hex << hdr_buffer->header << std::dec << ", packet_count: " << hdr_buffer->packet_count << " from rank " << hdr_buffer->rank << ", " << inet_ntoa(from.sin_addr) << ":" << from.sin_port << ", run number: " << hdr_buffer->run_number;
240  if (!hdr_buffer->isValid()) continue;
241 
242  request_received_ = true;
243 
244  // 19-Dec-2018, KAB: added check on current run number
245  if (run_number_ != 0 && hdr_buffer->run_number != run_number_)
246  {
247  TLOG(TLVL_WARNING) << "Received a Request Message with the wrong run number ("
248  << hdr_buffer->run_number << "), expected " << run_number_
249  << ", ignoring this request.";
250  continue;
251  }
252 
253  if (hdr_buffer->mode == artdaq::detail::RequestMessageMode::EndOfRun)
254  {
255  TLOG(TLVL_INFO) << "Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests...";
256  request_stop_timeout_ = std::chrono::steady_clock::now();
257  request_stop_requested_ = true;
258  }
259 
260  std::vector<artdaq::detail::RequestPacket> pkt_buffer(hdr_buffer->packet_count);
261  memcpy(&pkt_buffer[0], &buffer[sizeof(artdaq::detail::RequestHeader)], sizeof(artdaq::detail::RequestPacket) * hdr_buffer->packet_count);
262  bool anyNew = false;
263 
264  if (should_stop_) break;
265 
266  for (auto& buffer : pkt_buffer)
267  {
268  TLOG(20) << "Request Packet: hdr=" << /*std::dec <<*/ buffer.header << ", seq=" << buffer.sequence_id << ", ts=" << buffer.timestamp;
269  if (!buffer.isValid()) continue;
270  std::unique_lock<std::mutex> tlk(request_mutex_);
271  if (requests_.count(buffer.sequence_id) && requests_[buffer.sequence_id] != buffer.timestamp)
272  {
273  TLOG(TLVL_ERROR) << "Received conflicting request for SeqID "
274  << buffer.sequence_id << "!"
275  << " Old ts=" << requests_[buffer.sequence_id]
276  << ", new ts=" << buffer.timestamp << ". Keeping OLD!";
277  }
278  else if (!requests_.count(buffer.sequence_id))
279  {
280  int delta = buffer.sequence_id - highest_seen_request_;
281  TLOG(11) << "Received request for sequence ID " << buffer.sequence_id
282  << " and timestamp " << buffer.timestamp << " (delta: " << delta << ")";
283  if (delta <= 0 || out_of_order_requests_.count(buffer.sequence_id))
284  {
285  TLOG(11) << "Already serviced this request ( sequence ID " << buffer.sequence_id << ")! Ignoring...";
286  }
287  else
288  {
289  requests_[buffer.sequence_id] = buffer.timestamp;
290  request_timing_[buffer.sequence_id] = std::chrono::steady_clock::now();
291  anyNew = true;
292  }
293  }
294  }
295  if (anyNew)
296  {
297  request_cv_.notify_all();
298  }
299  }
300  TLOG(TLVL_DEBUG) << "Ending Request Thread";
301  running_ = false;
302 }
303 
304 std::pair<artdaq::Fragment::sequence_id_t, artdaq::Fragment::timestamp_t> artdaq::RequestReceiver::GetNextRequest()
305 {
306  std::unique_lock<std::mutex> lk(request_mutex_);
307 
308  auto it = requests_.begin();
309  while (it != requests_.end() && it->first <= last_next_request_) { ++it; }
310 
311  if (it == requests_.end())
312  {
313  return std::make_pair<artdaq::Fragment::sequence_id_t, artdaq::Fragment::timestamp_t>(0, 0);
314  }
315 
316  last_next_request_ = it->first;
317  return *it;
318 }
319 
320 void artdaq::RequestReceiver::RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
321 {
322  TLOG(10) << "RemoveRequest: Removing request for id " << reqID;
323  std::unique_lock<std::mutex> lk(request_mutex_);
324  requests_.erase(reqID);
325 
326  if (reqID > highest_seen_request_)
327  {
328  TLOG(10) << "RemoveRequest: out_of_order_requests_.size() == " << out_of_order_requests_.size() << ", reqID=" << reqID << ", expected=" << highest_seen_request_ + request_increment_;
329  if (out_of_order_requests_.size() || reqID != highest_seen_request_ + request_increment_)
330  {
331  out_of_order_requests_.insert(reqID);
332 
333  auto it = out_of_order_requests_.begin();
334  while (it != out_of_order_requests_.end() && !should_stop_) // Stop accounting for requests after stop
335  {
336  if (*it == highest_seen_request_ + request_increment_)
337  {
338  highest_seen_request_ = *it;
339  it = out_of_order_requests_.erase(it);
340  }
341  else
342  {
343  break;
344  }
345  }
346  }
347  else // no out-of-order requests and this request is highest seen + request_increment_
348  {
349  highest_seen_request_ = reqID;
350  }
351  TLOG(10) << "RemoveRequest: reqID=" << reqID << " Setting highest_seen_request_ to " << highest_seen_request_;
352  }
353  if (metricMan && request_timing_.count(reqID))
354  {
355  metricMan->sendMetric("Request Response Time", TimeUtils::GetElapsedTime(request_timing_[reqID]), "seconds", 2, MetricMode::Average);
356  }
357  request_timing_.erase(reqID);
358 }
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
Definition: TCPConnect.cc:33
void startRequestReception()
Enables (starts) the reception of data requests.
End of Run mode (Used to end request processing on receiver)
RequestReceiver()
RequestReceiver Default Constructor.
int GetInterfaceForNetwork(char const *host_in, in_addr &addr)
Convert an IP address to the network address of the interface sharing the subnet mask.
Definition: TCPConnect.cc:217
Header of a RequestMessage. Contains magic bytes for validation and a count of expected RequestPacket...
void setupRequestListener()
Opens the socket used to listen for data requests.
void receiveRequestsLoop()
This function receives data request packets, adding new requests to the request list.
The RequestPacket contains information about a single data request.
std::pair< artdaq::Fragment::sequence_id_t, artdaq::Fragment::timestamp_t > GetNextRequest()
Get the &quot;next&quot; request, i.e. the first unsatisfied request that has not already been returned by GetN...
void stopRequestReception(bool force=false)
Disables (stops) the reception of data requests.
void RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
Remove the request with the given sequence ID from the request map