artdaq  v3_09_00
RequestReceiver.cc
1 #include "artdaq/DAQdata/Globals.hh"
2 #define TRACE_NAME (app_name + "_RequestReceiver").c_str()
3 
4 #include "artdaq/DAQdata/Globals.hh"
5 #include "artdaq/DAQrate/RequestReceiver.hh"
6 #include "artdaq/DAQrate/detail/RequestMessage.hh"
7 
8 #include <boost/exception/all.hpp>
9 #include <boost/throw_exception.hpp>
10 
11 #include <iterator>
12 #include <limits>
13 
14 #include "canvas/Utilities/Exception.h"
15 #include "cetlib_except/exception.h"
16 #include "fhiclcpp/ParameterSet.h"
17 
18 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
19 #include "artdaq-core/Data/Fragment.hh"
20 #include "artdaq-core/Utilities/ExceptionHandler.hh"
21 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
22 #include "artdaq-core/Utilities/TimeUtils.hh"
23 
24 #include <arpa/inet.h>
25 #include <netinet/in.h>
26 #include <sys/poll.h>
27 #include <algorithm>
28 #include <fstream>
29 #include <iomanip>
30 #include <iostream>
31 #include <iterator>
33 
35  : request_stop_requested_(false)
36  , request_received_(false)
37  , should_stop_(false)
38  , request_addr_("227.128.12.26")
39  , receive_requests_(false)
40 {}
41 
42 artdaq::RequestReceiver::RequestReceiver(const fhicl::ParameterSet& ps, std::shared_ptr<RequestBuffer> output_buffer)
43  : request_stop_requested_(false)
44  , request_received_(false)
45  , should_stop_(false)
46  , request_port_(ps.get<int>("request_port", 3001))
47  , request_addr_(ps.get<std::string>("request_address", "227.128.12.26"))
48  , multicast_in_addr_(ps.get<std::string>("multicast_interface_ip", "0.0.0.0"))
49  , receive_requests_(ps.get<bool>("receive_requests", false))
50  , end_of_run_timeout_ms_(ps.get<size_t>("end_of_run_quiet_timeout_ms", 1000))
51  , requests_(output_buffer)
52 {
53  if (receive_requests_)
54  {
56  }
57 }
58 
60 {
61  TLOG(TLVL_INFO) << "Setting up request listen socket, rank=" << my_rank << ", address=" << request_addr_ << ":" << request_port_
62  << ", multicast interface=" << multicast_in_addr_;
63  request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
64  if (request_socket_ < 0)
65  {
66  TLOG(TLVL_ERROR) << "Error creating socket for receiving data requests! err=" << strerror(errno);
67  exit(1);
68  }
69 
70  struct sockaddr_in si_me_request;
71 
72  int yes = 1;
73  if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
74  {
75  TLOG(TLVL_ERROR) << "Unable to enable port reuse on request socket, err=" << strerror(errno);
76  exit(1);
77  }
78  memset(&si_me_request, 0, sizeof(si_me_request));
79  si_me_request.sin_family = AF_INET;
80  si_me_request.sin_port = htons(request_port_);
81  si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
82  if (bind(request_socket_, reinterpret_cast<struct sockaddr*>(&si_me_request), sizeof(si_me_request)) == -1) // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
83  {
84  TLOG(TLVL_ERROR) << "Cannot bind request socket to port " << request_port_ << ", err=" << strerror(errno);
85  exit(1);
86  }
87 
88  if (request_addr_ != "localhost")
89  {
90  struct ip_mreq mreq;
91  int sts = ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
92  if (sts == -1)
93  {
94  TLOG(TLVL_ERROR) << "Unable to resolve multicast request address, err=" << strerror(errno);
95  exit(1);
96  }
97  sts = GetInterfaceForNetwork(multicast_in_addr_.c_str(), mreq.imr_interface);
98  if (sts == -1)
99  {
100  TLOG(TLVL_ERROR) << "Unable to determine the multicast network interface for " << multicast_in_addr_;
101  exit(1);
102  }
103  char addr_str[INET_ADDRSTRLEN];
104  inet_ntop(AF_INET, &(mreq.imr_interface), addr_str, INET_ADDRSTRLEN);
105  TLOG(TLVL_INFO) << "Successfully determined the multicast network interface for " << multicast_in_addr_ << ": " << addr_str << " (RequestReceiver)";
106  if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
107  {
108  TLOG(TLVL_ERROR) << "Unable to join multicast group, err=" << strerror(errno);
109  exit(1);
110  }
111  }
112  TLOG(TLVL_INFO) << "Done setting up request socket, rank=" << my_rank;
113 }
114 
116 {
117  stopRequestReception(true);
118 }
119 
121 {
122  std::unique_lock<std::mutex> lk(state_mutex_);
123  if (!receive_requests_) return;
124  if (!request_received_ && !force)
125  {
126  TLOG(TLVL_ERROR) << "Stop request received by RequestReceiver, but no requests have ever been received." << std::endl
127  << "Check that UDP port " << request_port_ << " is open in the firewall config.";
128  }
129  should_stop_ = true;
130  if (running_)
131  {
132  TLOG(TLVL_DEBUG) << "Joining requestThread";
133  try
134  {
135  if (requestThread_.joinable())
136  {
137  requestThread_.join();
138  }
139  }
140  catch (...)
141  {
142  // IGNORED
143  }
144  bool once = true;
145  while (running_)
146  {
147  if (once)
148  {
149  TLOG(TLVL_ERROR) << "running_ is true after thread join! Should NOT happen";
150  }
151  once = false;
152  usleep(10000);
153  }
154  }
155 
156  if (request_socket_ != -1)
157  {
158  close(request_socket_);
159  request_socket_ = -1;
160  }
161  request_received_ = false;
162 }
163 
165 {
166  if (!receive_requests_) return;
167  std::unique_lock<std::mutex> lk(state_mutex_);
168  if (requestThread_.joinable())
169  {
170  requestThread_.join();
171  }
172  should_stop_ = false;
173  request_stop_requested_ = false;
174 
175  if (request_socket_ == -1)
176  {
177  TLOG(TLVL_INFO) << "Connecting Request Reception socket";
178  setupRequestListener();
179  }
180 
181  TLOG(TLVL_INFO) << "Starting Request Reception Thread";
182  try
183  {
184  requestThread_ = boost::thread(&RequestReceiver::receiveRequestsLoop, this);
185  }
186  catch (const boost::exception& e)
187  {
188  TLOG(TLVL_ERROR) << "Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
189  std::cerr << "Caught boost::exception starting Request Receiver thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
190  exit(5);
191  }
192 }
193 
195 {
196  running_ = true;
197  requests_->reset();
198  requests_->setRunning(true);
199  while (!should_stop_)
200  {
201  TLOG(16) << "receiveRequestsLoop: Polling Request socket for new requests";
202 
203  if (request_socket_ == -1)
204  {
205  setupRequestListener();
206  }
207 
208  int ms_to_wait = 10;
209  struct pollfd ufds[1];
210  ufds[0].fd = request_socket_;
211  ufds[0].events = POLLIN | POLLPRI | POLLERR;
212  int rv = poll(ufds, 1, ms_to_wait);
213 
214  // Continue loop if no message received or message does not have correct event ID
215  if (rv <= 0 || (ufds[0].revents != POLLIN && ufds[0].revents != POLLPRI))
216  {
217  if (rv == 1 && ((ufds[0].revents & (POLLNVAL | POLLERR | POLLHUP)) != 0))
218  {
219  close(request_socket_);
220  request_socket_ = -1;
221  }
222  if (request_stop_requested_ && TimeUtils::GetElapsedTimeMilliseconds(request_stop_timeout_) > end_of_run_timeout_ms_)
223  {
224  break;
225  }
226  continue;
227  }
228 
229  TLOG(11) << "Received packet on Request channel";
230  std::vector<uint8_t> buffer(MAX_REQUEST_MESSAGE_SIZE);
231  struct sockaddr_in from;
232  socklen_t len = sizeof(from);
233  auto sts = recvfrom(request_socket_, &buffer[0], MAX_REQUEST_MESSAGE_SIZE, 0, reinterpret_cast<struct sockaddr*>(&from), &len); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
234  if (sts < 0)
235  {
236  TLOG(TLVL_ERROR) << "Error receiving request message header err=" << strerror(errno);
237  close(request_socket_);
238  request_socket_ = -1;
239  continue;
240  }
241 
242  auto hdr_buffer = reinterpret_cast<artdaq::detail::RequestHeader*>(&buffer[0]); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
243  TLOG(11) << "Request header word: 0x" << std::hex << hdr_buffer->header << std::dec << ", packet_count: " << hdr_buffer->packet_count << " from rank " << hdr_buffer->rank << ", " << inet_ntoa(from.sin_addr) << ":" << from.sin_port << ", run number: " << hdr_buffer->run_number;
244  if (!hdr_buffer->isValid())
245  {
246  continue;
247  }
248 
249  request_received_ = true;
250 
251  // 19-Dec-2018, KAB: added check on current run number
252  if (run_number_ != 0 && hdr_buffer->run_number != run_number_)
253  {
254  TLOG(TLVL_WARNING) << "Received a Request Message with the wrong run number ("
255  << hdr_buffer->run_number << "), expected " << run_number_
256  << ", ignoring this request.";
257  continue;
258  }
259 
260  if (hdr_buffer->mode == artdaq::detail::RequestMessageMode::EndOfRun)
261  {
262  TLOG(TLVL_INFO) << "Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests...";
263  request_stop_timeout_ = std::chrono::steady_clock::now();
264  request_stop_requested_ = true;
265  }
266 
267  std::vector<artdaq::detail::RequestPacket> pkt_buffer(hdr_buffer->packet_count);
268  memcpy(&pkt_buffer[0], &buffer[sizeof(artdaq::detail::RequestHeader)], sizeof(artdaq::detail::RequestPacket) * hdr_buffer->packet_count);
269 
270  if (should_stop_)
271  {
272  break;
273  }
274 
275  for (auto& buffer : pkt_buffer)
276  {
277  TLOG(20) << "Request Packet: hdr=" << /*std::dec <<*/ buffer.header << ", seq=" << buffer.sequence_id << ", ts=" << buffer.timestamp;
278  if (!buffer.isValid()) continue;
279  requests_->push(buffer.sequence_id, buffer.timestamp);
280  }
281  }
282  TLOG(TLVL_DEBUG) << "Ending Request Thread";
283  running_ = false;
284  requests_->setRunning(false);
285 }
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
Definition: TCPConnect.cc:33
void startRequestReception()
Enables (starts) the reception of data requests.
End of Run mode (Used to end request processing on receiver)
RequestReceiver()
RequestReceiver Default Constructor.
int GetInterfaceForNetwork(char const *host_in, in_addr &addr)
Convert an IP address to the network address of the interface sharing the subnet mask.
Definition: TCPConnect.cc:223
Header of a RequestMessage. Contains magic bytes for validation and a count of expected RequestPacket...
void setupRequestListener()
Opens the socket used to listen for data requests.
void receiveRequestsLoop()
This function receives data request packets, adding new requests to the request list.
The RequestPacket contains information about a single data request.
virtual ~RequestReceiver()
RequestReceiver Destructor.
void stopRequestReception(bool force=false)
Disables (stops) the reception of data requests.