artdaq  v3_02_01
RequestReceiver.cc
1 #define TRACE_NAME "RequestReceiver"
2 
3 #include "artdaq/DAQrate/RequestReceiver.hh"
4 #include "artdaq/DAQdata/Globals.hh"
5 #include "artdaq/DAQrate/detail/RequestMessage.hh"
6 
7 #include <boost/exception/all.hpp>
8 #include <boost/throw_exception.hpp>
9 
10 #include <limits>
11 #include <iterator>
12 
13 #include "canvas/Utilities/Exception.h"
14 #include "cetlib_except/exception.h"
15 #include "fhiclcpp/ParameterSet.h"
16 
17 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
18 #include "artdaq-core/Data/Fragment.hh"
19 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
20 #include "artdaq-core/Utilities/ExceptionHandler.hh"
21 #include "artdaq-core/Utilities/TimeUtils.hh"
22 
23 #include <fstream>
24 #include <iomanip>
25 #include <iterator>
26 #include <iostream>
27 #include <iomanip>
28 #include <algorithm>
29 #include <sys/poll.h>
31 
33  : request_port_(3001)
34  , request_addr_("227.128.12.26")
35  , running_(false)
36  , requests_()
37  , request_timing_()
38  , request_stop_requested_(false)
39  , request_received_(false)
40  , end_of_run_timeout_ms_(1000)
41  , should_stop_(false)
42  , highest_seen_request_(0)
43  , out_of_order_requests_()
44  , request_increment_(1)
45 {}
46 
47 artdaq::RequestReceiver::RequestReceiver(const fhicl::ParameterSet& ps)
48  : request_port_(ps.get<int>("request_port", 3001))
49  , request_addr_(ps.get<std::string>("request_address", "227.128.12.26"))
50  , multicast_out_addr_(ps.get<std::string>("multicast_interface_ip", "0.0.0.0"))
51  , running_(false)
52  , requests_()
53  , request_timing_()
54  , request_stop_requested_(false)
55  , request_received_(false)
56  , end_of_run_timeout_ms_(ps.get<size_t>("end_of_run_quiet_timeout_ms", 1000))
57  , should_stop_(false)
58  , highest_seen_request_(0)
59  , out_of_order_requests_()
60  , request_increment_(ps.get<artdaq::Fragment::sequence_id_t>("request_increment", 1))
61 {
63 }
64 
66 {
67  TLOG(TLVL_INFO) << "Setting up request listen socket, rank=" << my_rank << ", address=" << request_addr_ << ":" << request_port_;
68  request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
69  if (request_socket_ < 0)
70  {
71  TLOG(TLVL_ERROR) << "Error creating socket for receiving data requests! err=" << strerror(errno);
72  exit(1);
73  }
74 
75  struct sockaddr_in si_me_request;
76 
77  int yes = 1;
78  if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
79  {
80  TLOG(TLVL_ERROR) << "Unable to enable port reuse on request socket, err=" << strerror(errno);
81  exit(1);
82  }
83  memset(&si_me_request, 0, sizeof(si_me_request));
84  si_me_request.sin_family = AF_INET;
85  si_me_request.sin_port = htons(request_port_);
86  si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
87  if (bind(request_socket_, (struct sockaddr *)&si_me_request, sizeof(si_me_request)) == -1)
88  {
89  TLOG(TLVL_ERROR) << "Cannot bind request socket to port " << request_port_ << ", err=" << strerror(errno);
90  exit(1);
91  }
92 
93  if (request_addr_ != "localhost")
94  {
95  struct ip_mreq mreq;
96  int sts = ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
97  if (sts == -1)
98  {
99  TLOG(TLVL_ERROR) << "Unable to resolve multicast request address, err=" << strerror(errno);
100  exit(1);
101  }
102  sts = GetInterfaceForNetwork(multicast_out_addr_.c_str(), mreq.imr_interface);
103  if (sts == -1)
104  {
105  TLOG(TLVL_ERROR) << "Unable to resolve hostname for " << multicast_out_addr_;
106  exit(1);
107  }
108  if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
109  {
110  TLOG(TLVL_ERROR) << "Unable to join multicast group, err=" << strerror(errno);
111  exit(1);
112  }
113  }
114  TLOG(TLVL_INFO) << "Done setting up request socket, rank=" << my_rank;
115 }
116 
117 artdaq::RequestReceiver::~RequestReceiver()
118 {
119  stopRequestReceiverThread(true);
120 }
121 
123 {
124  std::unique_lock<std::mutex> lk(state_mutex_);
125  if (!request_received_ && !force)
126  {
127  TLOG(TLVL_ERROR) << "Stop request received by RequestReceiver, but no requests have ever been received." << std::endl
128  << "Check that UDP port " << request_port_ << " is open in the firewall config.";
129  }
130  should_stop_ = true;
131  TLOG(TLVL_DEBUG) << "Joining requestThread";
132  if (requestThread_.joinable()) requestThread_.join();
133  bool once = true;
134  while (running_) {
135  if (once) TLOG(TLVL_ERROR) << "running_ is true after thread join! Should NOT happen";
136  once = false;
137  usleep(10000);
138  }
139 
140  if (request_socket_ != -1)
141  {
142  close(request_socket_);
143  request_socket_ = -1;
144  }
145  request_received_ = false;
146  highest_seen_request_ = 0;
147 }
148 
150 {
151  std::unique_lock<std::mutex> lk(state_mutex_);
152  if (requestThread_.joinable()) requestThread_.join();
153  should_stop_ = false;
154  request_stop_requested_ = false;
155 
156  if (request_socket_ == -1)
157  {
158  TLOG(TLVL_INFO) << "Connecting Request Reception socket";
159  setupRequestListener();
160  }
161 
162  TLOG(TLVL_INFO) << "Starting Request Reception Thread";
163  requestThread_ = boost::thread(&RequestReceiver::receiveRequestsLoop, this);
164  running_ = true;
165 }
166 
168 {
169  while (!should_stop_)
170  {
171  TLOG(16) << "receiveRequestsLoop: Polling Request socket for new requests";
172 
173  if (request_socket_ == -1)
174  {
175  setupRequestListener();
176  }
177 
178  int ms_to_wait = 10;
179  struct pollfd ufds[1];
180  ufds[0].fd = request_socket_;
181  ufds[0].events = POLLIN | POLLPRI | POLLERR;
182  int rv = poll(ufds, 1, ms_to_wait);
183 
184  // Continue loop if no message received or message does not have correct event ID
185  if (rv <= 0 || (ufds[0].revents != POLLIN && ufds[0].revents != POLLPRI))
186  {
187  if (rv == 1 && (ufds[0].revents & (POLLNVAL | POLLERR | POLLHUP)))
188  {
189  close(request_socket_);
190  request_socket_ = -1;
191  }
192  if (request_stop_requested_ && TimeUtils::GetElapsedTimeMilliseconds(request_stop_timeout_) > end_of_run_timeout_ms_)
193  {
194  break;
195  }
196  continue;
197  }
198 
199  TLOG(11) << "Recieved packet on Request channel";
201  auto sts = recv(request_socket_, &hdr_buffer, sizeof(hdr_buffer), 0);
202  if (sts < 0)
203  {
204  TLOG(TLVL_ERROR) << "Error receiving request message header err=" << strerror(errno);
205  close(request_socket_);
206  request_socket_ = -1;
207  continue;
208  }
209  TLOG(11) << "Request header word: 0x" << std::hex << hdr_buffer.header;
210  if (!hdr_buffer.isValid()) continue;
211 
212  request_received_ = true;
214  {
215  TLOG(TLVL_INFO) << "Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests...";
216  request_stop_timeout_ = std::chrono::steady_clock::now();
217  request_stop_requested_ = true;
218  }
219 
220  std::vector<artdaq::detail::RequestPacket> pkt_buffer(hdr_buffer.packet_count);
221  size_t recvd = 0;
222  while (recvd < sizeof(artdaq::detail::RequestPacket) * hdr_buffer.packet_count)
223  {
224  ssize_t this_recv = recv(request_socket_, reinterpret_cast<uint8_t*>(&pkt_buffer[0]) + recvd, sizeof(artdaq::detail::RequestPacket) * hdr_buffer.packet_count - recvd, 0);
225  if (this_recv < 0)
226  {
227  TLOG(TLVL_ERROR) << "Error receiving request message data err=" << strerror(errno);
228  close(request_socket_);
229  request_socket_ = -1;
230  continue;
231 
232  }
233  recvd += this_recv;
234  }
235  bool anyNew = false;
236 
237  if (should_stop_) break;
238 
239  for (auto& buffer : pkt_buffer)
240  {
241  TLOG(20) << "Request Packet: hdr=" << buffer.header << ", seq=" << buffer.sequence_id << ", ts=" << buffer.timestamp;
242  if (!buffer.isValid()) continue;
243  if (requests_.count(buffer.sequence_id) && requests_[buffer.sequence_id] != buffer.timestamp)
244  {
245  TLOG(TLVL_ERROR) << "Received conflicting request for SeqID "
246  << buffer.sequence_id << "!"
247  << " Old ts=" << requests_[buffer.sequence_id]
248  << ", new ts=" << buffer.timestamp << ". Keeping OLD!";
249  }
250  else if (!requests_.count(buffer.sequence_id))
251  {
252  int delta = buffer.sequence_id - highest_seen_request_;
253  TLOG(11) << "Recieved request for sequence ID " << buffer.sequence_id
254  << " and timestamp " << buffer.timestamp << " (delta: " << delta << ")";
255  if (delta <= 0 || out_of_order_requests_.count(buffer.sequence_id))
256  {
257  TLOG(11) << "Already serviced this request! Ignoring...";
258  }
259  else
260  {
261  std::unique_lock<std::mutex> tlk(request_mutex_);
262  requests_[buffer.sequence_id] = buffer.timestamp;
263  request_timing_[buffer.sequence_id] = std::chrono::steady_clock::now();
264  anyNew = true;
265  }
266  }
267  }
268  if (anyNew)
269  {
270  request_cv_.notify_all();
271  }
272  }
273  TLOG(TLVL_DEBUG) << "Ending Request Thread";
274  running_ = false;
275 }
276 
277 void artdaq::RequestReceiver::RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
278 {
279  TLOG(10) << "RemoveRequest: Removing request with id " << reqID;
280  std::unique_lock<std::mutex> lk(request_mutex_);
281  requests_.erase(reqID);
282 
283  if (reqID > highest_seen_request_)
284  {
285  TLOG(10) << "RemoveRequest: out_of_order_requests_.size() == " << out_of_order_requests_.size() << ", reqID=" << reqID << ", expected=" << highest_seen_request_ + request_increment_;
286  if (out_of_order_requests_.size() || reqID != highest_seen_request_ + request_increment_)
287  {
288  out_of_order_requests_.insert(reqID);
289 
290  auto it = out_of_order_requests_.begin();
291  while (it != out_of_order_requests_.end() && !should_stop_) // Stop accounting for requests after stop
292  {
293  if (*it == highest_seen_request_ + request_increment_)
294  {
295  highest_seen_request_ = *it;
296  it = out_of_order_requests_.erase(it);
297  }
298  else
299  {
300  break;
301  }
302  }
303  }
304  else // no out-of-order requests and this request is highest seen + request_increment_
305  {
306  highest_seen_request_ = reqID;
307  }
308  TLOG(10) << "RemoveRequest: reqID=" << reqID << " Setting highest_seen_request_ to " << highest_seen_request_;
309  }
310  if (metricMan)
311  {
312  metricMan->sendMetric("Request Response Time", TimeUtils::GetElapsedTime(request_timing_[reqID]), "seconds", 2, MetricMode::Average);
313  }
314  request_timing_.erase(reqID);
315 }
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
Definition: TCPConnect.cc:33
End of Run mode (Used to end request processing on receiver)
RequestMessageMode mode
Communicates additional information to the Request receiver.
void startRequestReceiverThread()
Function that launches the data request receiver thread (receiveRequestsLoop())
bool isValid() const
Check the magic bytes of the packet.
RequestReceiver()
RequestReceiver Default Constructor.
int GetInterfaceForNetwork(char const *host_in, in_addr &addr)
Convert an IP address to the network address of the interface sharing the subnet mask.
Definition: TCPConnect.cc:216
Header of a RequestMessage. Contains magic bytes for validation and a count of expected RequestPacket...
void stopRequestReceiverThread(bool force=false)
Stop the data request receiver thread (receiveRequestsLoop)
uint32_t packet_count
The number of RequestPackets in this Request message.
void setupRequestListener()
Opens the socket used to listen for data requests.
void receiveRequestsLoop()
This function receives data request packets, adding new requests to the request list.
The RequestPacket contains information about a single data request.
void RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
Remove the request with the given sequence ID from the request map