artdaq  v3_02_00
RequestReceiver.cc
1 #define TRACE_NAME "RequestReceiver"
2 
3 #include "artdaq/DAQrate/RequestReceiver.hh"
4 #include "artdaq/DAQdata/Globals.hh"
5 #include "artdaq/DAQrate/detail/RequestMessage.hh"
6 
7 #include <boost/exception/all.hpp>
8 #include <boost/throw_exception.hpp>
9 
10 #include <limits>
11 #include <iterator>
12 
13 #include "canvas/Utilities/Exception.h"
14 #include "cetlib_except/exception.h"
15 #include "fhiclcpp/ParameterSet.h"
16 
17 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
18 #include "artdaq-core/Data/Fragment.hh"
19 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
20 #include "artdaq-core/Utilities/ExceptionHandler.hh"
21 #include "artdaq-core/Utilities/TimeUtils.hh"
22 
23 #include <fstream>
24 #include <iomanip>
25 #include <iterator>
26 #include <iostream>
27 #include <iomanip>
28 #include <algorithm>
29 #include <sys/poll.h>
31 
32 artdaq::RequestReceiver::RequestReceiver()
33  : request_port_(3001)
34  , request_addr_("227.128.12.26")
35  , running_(false)
36  , requests_()
37  , request_timing_()
38  , request_stop_requested_(false)
39  , request_received_(false)
40  , end_of_run_timeout_ms_(1000)
41  , should_stop_(false)
42  , highest_seen_request_(0)
43 {}
44 
45 artdaq::RequestReceiver::RequestReceiver(const fhicl::ParameterSet& ps)
46  : request_port_(ps.get<int>("request_port", 3001))
47  , request_addr_(ps.get<std::string>("request_address", "227.128.12.26"))
48  , multicast_out_addr_(ps.get<std::string>("multicast_interface_ip", "0.0.0.0"))
49  , running_(false)
50  , requests_()
51  , request_timing_()
52  , request_stop_requested_(false)
53  , request_received_(false)
54  , end_of_run_timeout_ms_(ps.get<size_t>("end_of_run_quiet_timeout_ms", 1000))
55  , should_stop_(false)
56  , highest_seen_request_(0)
57 {
58  setupRequestListener();
59 }
60 
62 {
63  TLOG(TLVL_INFO) << "Setting up request listen socket, rank=" << my_rank << ", address=" << request_addr_ << ":" << request_port_ ;
64  request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
65  if (request_socket_ < 0)
66  {
67  TLOG(TLVL_ERROR) << "Error creating socket for receiving data requests! err=" << strerror(errno) ;
68  exit(1);
69  }
70 
71  struct sockaddr_in si_me_request;
72 
73  int yes = 1;
74  if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
75  {
76  TLOG(TLVL_ERROR) << "Unable to enable port reuse on request socket, err=" << strerror(errno) ;
77  exit(1);
78  }
79  memset(&si_me_request, 0, sizeof(si_me_request));
80  si_me_request.sin_family = AF_INET;
81  si_me_request.sin_port = htons(request_port_);
82  si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
83  if (bind(request_socket_, (struct sockaddr *)&si_me_request, sizeof(si_me_request)) == -1)
84  {
85  TLOG(TLVL_ERROR) << "Cannot bind request socket to port " << request_port_ << ", err=" << strerror(errno) ;
86  exit(1);
87  }
88 
89  if (request_addr_ != "localhost")
90  {
91  struct ip_mreq mreq;
92  int sts = ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
93  if (sts == -1)
94  {
95  TLOG(TLVL_ERROR) << "Unable to resolve multicast request address, err=" << strerror(errno) ;
96  exit(1);
97  }
98  sts = GetInterfaceForNetwork(multicast_out_addr_.c_str(), mreq.imr_interface);
99  if (sts == -1)
100  {
101  TLOG(TLVL_ERROR) << "Unable to resolve hostname for " << multicast_out_addr_;
102  exit(1);
103  }
104  if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
105  {
106  TLOG(TLVL_ERROR) << "Unable to join multicast group, err=" << strerror(errno) ;
107  exit(1);
108  }
109  }
110  TLOG(TLVL_INFO) << "Done setting up request socket, rank=" << my_rank ;
111 }
112 
113 artdaq::RequestReceiver::~RequestReceiver()
114 {
115  stopRequestReceiverThread();
116 }
117 
119 {
120  std::unique_lock<std::mutex> lk(state_mutex_);
121  if (!request_received_)
122  {
123  TLOG(TLVL_ERROR) << "Stop request received by RequestReceiver, but no requests have ever been received." << std::endl
124  << "Check that UDP port " << request_port_ << " is open in the firewall config.";
125  }
126  should_stop_ = true;
127  TLOG(TLVL_DEBUG) << "Joining requestThread";
128  if (requestThread_.joinable()) requestThread_.join();
129  while (running_) usleep(10000);
130 
131  if (request_socket_ != -1) {
132  close(request_socket_);
133  request_socket_ = -1;
134  }
135  request_received_ = false;
136  highest_seen_request_ = 0;
137 }
138 
140 {
141  std::unique_lock<std::mutex> lk(state_mutex_);
142  if (requestThread_.joinable()) requestThread_.join();
143  should_stop_ = false;
144  request_stop_requested_ = false;
145 
146  if (request_socket_ == -1) {
147  TLOG(TLVL_INFO) << "Connecting Request Reception socket";
148  setupRequestListener();
149  }
150 
151  TLOG(TLVL_INFO) << "Starting Request Reception Thread" ;
152  requestThread_ = boost::thread(&RequestReceiver::receiveRequestsLoop, this);
153  running_ = true;
154 }
155 
157 {
158  while (!should_stop_)
159  {
160  TLOG(16) << "receiveRequestsLoop: Polling Request socket for new requests" ;
161 
162  if (request_socket_ == -1)
163  {
164  setupRequestListener();
165  }
166 
167  int ms_to_wait = 10;
168  struct pollfd ufds[1];
169  ufds[0].fd = request_socket_;
170  ufds[0].events = POLLIN | POLLPRI | POLLERR;
171  int rv = poll(ufds, 1, ms_to_wait);
172 
173  // Continue loop if no message received or message does not have correct event ID
174  if (rv <= 0 || (ufds[0].revents != POLLIN && ufds[0].revents != POLLPRI))
175  {
176  if (rv == 1 && (ufds[0].revents == POLLNVAL || ufds[0].revents == POLLERR))
177  {
178  close(request_socket_);
179  request_socket_ = -1;
180  }
181  if (request_stop_requested_ && TimeUtils::GetElapsedTimeMilliseconds(request_stop_timeout_) > end_of_run_timeout_ms_)
182  {
183  break;
184  }
185  continue;
186  }
187 
188  TLOG(11) << "Recieved packet on Request channel" ;
190  auto sts = recv(request_socket_, &hdr_buffer, sizeof(hdr_buffer), 0);
191  if (sts < 0)
192  {
193  TLOG(TLVL_ERROR) << "Error receiving request message header err=" << strerror(errno);
194  close(request_socket_);
195  request_socket_ = -1;
196  continue;
197  }
198  TLOG(11) << "Request header word: 0x" << std::hex << hdr_buffer.header ;
199  if (!hdr_buffer.isValid()) continue;
200 
201  request_received_ = true;
203  {
204  TLOG(TLVL_INFO) << "Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests..." ;
205  request_stop_timeout_ = std::chrono::steady_clock::now();
206  request_stop_requested_ = true;
207  }
208 
209  std::vector<artdaq::detail::RequestPacket> pkt_buffer(hdr_buffer.packet_count);
210  size_t recvd = 0;
211  while (recvd < sizeof(artdaq::detail::RequestPacket) * hdr_buffer.packet_count)
212  {
213  ssize_t this_recv = recv(request_socket_, reinterpret_cast<uint8_t*>(&pkt_buffer[0]) + recvd, sizeof(artdaq::detail::RequestPacket) * hdr_buffer.packet_count - recvd, 0);
214  if (this_recv < 0)
215  {
216  TLOG(TLVL_ERROR) << "Error receiving request message data err=" << strerror(errno);
217  close(request_socket_);
218  request_socket_ = -1;
219  continue;
220 
221  }
222  recvd += this_recv;
223  }
224  bool anyNew = false;
225 
226  if (should_stop_) break;
227 
228  for (auto& buffer : pkt_buffer)
229  {
230  if (!buffer.isValid()) continue;
231  if (requests_.count(buffer.sequence_id) && requests_[buffer.sequence_id] != buffer.timestamp)
232  {
233  TLOG(TLVL_ERROR) << "Received conflicting request for SeqID "
234  << std::to_string(buffer.sequence_id) << "!"
235  << " Old ts=" << std::to_string(requests_[buffer.sequence_id])
236  << ", new ts=" << std::to_string(buffer.timestamp) << ". Keeping OLD!" ;
237  }
238  else if (!requests_.count(buffer.sequence_id))
239  {
240  int delta = buffer.sequence_id - highest_seen_request_;
241  TLOG(11) << "Recieved request for sequence ID " << std::to_string(buffer.sequence_id)
242  << " and timestamp " << std::to_string(buffer.timestamp) << " (delta: " << delta << ")" ;
243  if (delta < 0)
244  {
245  TLOG(11) << "Already serviced this request! Ignoring..." ;
246  }
247  else
248  {
249  std::unique_lock<std::mutex> tlk(request_mutex_);
250  requests_[buffer.sequence_id] = buffer.timestamp;
251  request_timing_[buffer.sequence_id] = std::chrono::steady_clock::now();
252  anyNew = true;
253  }
254  }
255  }
256  if (anyNew) {
257  request_cv_.notify_all();
258  }
259  }
260  TLOG(TLVL_DEBUG) << "Ending Request Thread";
261  running_ = false;
262 }
263 
264 void artdaq::RequestReceiver::RemoveRequest(artdaq::Fragment::sequence_id_t reqID)
265 {
266  std::unique_lock<std::mutex> lk(request_mutex_);
267  requests_.erase(reqID);
268  if (reqID > highest_seen_request_ && !should_stop_) // Stop accounting for requests after stop
269  {
270  TLOG(18) << "Setting highest_seen_request_ to " << reqID;
271  highest_seen_request_ = reqID;
272  }
273 
274  if (metricMan)
275  {
276  metricMan->sendMetric("Request Response Time", TimeUtils::GetElapsedTime(request_timing_[reqID]), "seconds", 2, MetricMode::Average);
277  }
278  request_timing_.erase(reqID);
279 }
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
Definition: TCPConnect.cc:32
End of Run mode (Used to end request processing on receiver)
RequestMessageMode mode
Communicates additional information to the Request receiver.
void startRequestReceiverThread()
Function that launches the data request receiver thread (receiveRequestsLoop())
bool isValid() const
Check the magic bytes of the packet.
int GetInterfaceForNetwork(char const *host_in, in_addr &addr)
Convert an IP address to the network address of the interface sharing the subnet mask.
Definition: TCPConnect.cc:75
Header of a RequestMessage. Contains magic bytes for validation and a count of expected RequestPacket...
uint32_t packet_count
The number of RequestPackets in this Request message.
void setupRequestListener()
Opens the socket used to listen for data requests.
void receiveRequestsLoop()
This function receives data request packets, adding new requests to the request list.
The RequestPacket contains information about a single data request.
void stopRequestReceiverThread()
Stop the data request receiver thread (receiveRequestsLoop)