artdaq  v3_09_01
TokenReceiver.cc
1 #include "artdaq/DAQdata/Globals.hh"
2 #define TRACE_NAME (app_name + "_TokenReceiver").c_str()
3 
4 #include <arpa/inet.h>
5 
6 #include <utility>
7 
8 #include <utility>
10 #include "artdaq/DAQrate/TokenReceiver.hh"
11 
12 artdaq::TokenReceiver::TokenReceiver(const fhicl::ParameterSet& ps, std::shared_ptr<RoutingManagerPolicy> policy,
13  detail::RoutingManagerMode routing_mode, size_t number_of_senders, size_t update_interval_msec)
14  : token_port_(ps.get<int>("routing_token_port", 35555))
15  , policy_(std::move(std::move(policy)))
16  , routing_mode_(routing_mode)
17  , number_of_senders_(number_of_senders)
18  , update_interval_msec_(update_interval_msec)
19  , token_socket_(-1)
20  , token_epoll_fd_(-1)
21  , thread_is_running_(false)
22  , reception_is_paused_(false)
23  , shutdown_requested_(false)
24  , run_number_(0)
25  , statsHelperPtr_(nullptr)
26 {
27  receive_token_events_ = std::vector<epoll_event>(policy_->GetReceiverCount() + 1);
28 }
29 
31 {
32  stopTokenReception(true);
33 }
34 
36 {
37  if (token_thread_.joinable())
38  {
39  token_thread_.join();
40  }
41  boost::thread::attributes attrs;
42  attrs.set_stack_size(4096 * 2000); // 8000 KB
43 
44  reception_is_paused_ = false;
45  shutdown_requested_ = false;
46 
47  TLOG(TLVL_INFO) << "Starting Token Reception Thread";
48  try
49  {
50  token_thread_ = boost::thread(attrs, boost::bind(&TokenReceiver::receiveTokensLoop_, this));
51  }
52  catch (boost::exception const& e)
53  {
54  TLOG(TLVL_ERROR) << "Exception encountered starting Token Reception thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
55  std::cerr << "Exception encountered starting Token Reception thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
56  exit(3);
57  }
58  received_token_count_ = 0;
59  thread_is_running_ = true;
60  TLOG(TLVL_INFO) << "Started Token Reception Thread";
61 }
62 
64 {
65  shutdown_requested_ = true;
66  reception_is_paused_ = false;
67  if (thread_is_running_)
68  {
69  if (received_token_count_ == 0 && !force)
70  {
71  TLOG(TLVL_DEBUG) << "Stop request received by TokenReceiver, but no tokens have ever been received.";
72  }
73  TLOG(TLVL_DEBUG) << "Joining tokenThread";
74  try
75  {
76  if (token_thread_.joinable())
77  {
78  token_thread_.join();
79  }
80  }
81  catch (...)
82  {
83  // IGNORED
84  }
85  thread_is_running_ = false;
86  }
87 
88  if (token_socket_ != -1)
89  {
90  close(token_socket_);
91  token_socket_ = -1;
92  token_epoll_fd_ = -1;
93  }
94 }
95 
96 void artdaq::TokenReceiver::receiveTokensLoop_()
97 {
98  while (!shutdown_requested_)
99  {
100  TLOG(TLVL_DEBUG) << "Receive Token loop start";
101  if (token_socket_ == -1)
102  {
103  TLOG(TLVL_DEBUG) << "Opening token listener socket";
104  token_socket_ = TCP_listen_fd(token_port_, 3 * sizeof(detail::RoutingToken));
105  fcntl(token_socket_, F_SETFL, O_NONBLOCK); // set O_NONBLOCK
106 
107  if (token_epoll_fd_ != -1)
108  {
109  close(token_epoll_fd_);
110  }
111  struct epoll_event ev;
112  token_epoll_fd_ = epoll_create1(0);
113  ev.events = EPOLLIN | EPOLLPRI;
114  ev.data.fd = token_socket_;
115  if (epoll_ctl(token_epoll_fd_, EPOLL_CTL_ADD, token_socket_, &ev) == -1)
116  {
117  TLOG(TLVL_ERROR) << "Could not register listen socket to epoll fd";
118  exit(3);
119  }
120  }
121  if (token_socket_ == -1 || token_epoll_fd_ == -1)
122  {
123  TLOG(TLVL_DEBUG) << "One of the listen sockets was not opened successfully.";
124  return;
125  }
126 
127  auto nfds = epoll_wait(token_epoll_fd_, &receive_token_events_[0], receive_token_events_.size(), update_interval_msec_);
128  if (nfds == -1)
129  {
130  TLOG(TLVL_ERROR) << "Error status received from epoll_wait, exiting with code " << EXIT_FAILURE << ", errno=" << errno << " (" << strerror(errno) << ")";
131  perror("epoll_wait");
132  exit(EXIT_FAILURE);
133  }
134 
135  while (reception_is_paused_ && !shutdown_requested_)
136  {
137  usleep(10000);
138  }
139 
140  TLOG(TLVL_DEBUG) << "Received " << nfds << " events";
141  for (auto n = 0; n < nfds; ++n)
142  {
143  if (receive_token_events_[n].data.fd == token_socket_)
144  {
145  TLOG(TLVL_DEBUG) << "Accepting new connection on token_socket";
146  sockaddr_in addr;
147  socklen_t arglen = sizeof(addr);
148  auto conn_sock = accept(token_socket_, reinterpret_cast<struct sockaddr*>(&addr), &arglen); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
149  fcntl(conn_sock, F_SETFL, O_NONBLOCK); // set O_NONBLOCK
150 
151  if (conn_sock == -1)
152  {
153  TLOG(TLVL_ERROR) << "Error status received from accept, exiting with code " << EXIT_FAILURE << ", errno=" << errno << " (" << strerror(errno) << ")";
154  perror("accept");
155  exit(EXIT_FAILURE);
156  }
157 
158  receive_token_addrs_[conn_sock] = std::string(inet_ntoa(addr.sin_addr));
159  TLOG(TLVL_DEBUG) << "New fd is " << conn_sock << " for data-receiver at " << receive_token_addrs_[conn_sock];
160  struct epoll_event ev;
161  ev.events = EPOLLIN | EPOLLET;
162  ev.data.fd = conn_sock;
163  if (epoll_ctl(token_epoll_fd_, EPOLL_CTL_ADD, conn_sock, &ev) == -1)
164  {
165  TLOG(TLVL_ERROR) << "Error status received from epoll_ctl, exiting with code " << EXIT_FAILURE << ", errno=" << errno << " (" << strerror(errno) << ")";
166  perror("epoll_ctl: conn_sock");
167  exit(EXIT_FAILURE);
168  }
169  }
170  else
171  {
172  auto startTime = artdaq::MonitoredQuantity::getCurrentTime();
173  bool reading = true;
174  int sts = 0;
175  while (reading)
176  {
177  detail::RoutingToken buff;
178  sts += read(receive_token_events_[n].data.fd, &buff, sizeof(detail::RoutingToken) - sts);
179  if (sts == 0)
180  {
181  TLOG(TLVL_INFO) << "Received 0-size token from " << receive_token_addrs_[receive_token_events_[n].data.fd];
182  reading = false;
183  }
184  else if (sts < 0 && errno == EAGAIN)
185  {
186  TLOG(TLVL_DEBUG) << "No more tokens from this rank. Continuing poll loop.";
187  reading = false;
188  }
189  else if (sts < 0)
190  {
191  TLOG(TLVL_ERROR) << "Error reading from token socket: sts=" << sts << ", errno=" << errno;
192  receive_token_addrs_.erase(receive_token_events_[n].data.fd);
193  close(receive_token_events_[n].data.fd);
194  epoll_ctl(token_epoll_fd_, EPOLL_CTL_DEL, receive_token_events_[n].data.fd, nullptr);
195  reading = false;
196  }
197  else if (sts == sizeof(detail::RoutingToken) && buff.header != TOKEN_MAGIC)
198  {
199  TLOG(TLVL_ERROR) << "Received invalid token from " << receive_token_addrs_[receive_token_events_[n].data.fd] << " sts=" << sts;
200  reading = false;
201  }
202  else if (sts == sizeof(detail::RoutingToken))
203  {
204  sts = 0;
205  TLOG(TLVL_DEBUG) << "Received token from " << buff.rank << " indicating " << buff.new_slots_free << " slots are free. (run=" << buff.run_number << ")";
206  if (buff.run_number != run_number_)
207  {
208  TLOG(TLVL_DEBUG) << "Received token from a different run number! Current = " << run_number_ << ", token = " << buff.run_number << ", ignoring (n=" << buff.new_slots_free << ")";
209  }
210  else
211  {
212  received_token_count_ += buff.new_slots_free;
214  {
215  policy_->AddReceiverToken(buff.rank, buff.new_slots_free);
216  }
217  else if (routing_mode_ == detail::RoutingManagerMode::RouteBySendCount)
218  {
219  if (received_token_counter_.count(buff.rank) == 0u)
220  {
221  received_token_counter_[buff.rank] = 0;
222  }
223  received_token_counter_[buff.rank] += buff.new_slots_free;
224  TLOG(TLVL_DEBUG) << "RoutingManagerMode is RouteBySendCount. I have " << received_token_counter_[buff.rank] << " tokens for rank " << buff.rank << " and I need " << number_of_senders_ << ".";
225  while (received_token_counter_[buff.rank] >= number_of_senders_)
226  {
227  TLOG(TLVL_DEBUG) << "RoutingManagerMode is RouteBySendCount. I have " << received_token_counter_[buff.rank] << " tokens for rank " << buff.rank << " and I need " << number_of_senders_
228  << "... Sending token to policy";
229  policy_->AddReceiverToken(buff.rank, 1);
230  received_token_counter_[buff.rank] -= number_of_senders_;
231  }
232  }
233  }
234  }
235  }
236  auto delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
237  if (statsHelperPtr_ != nullptr) { statsHelperPtr_->addSample(tokens_received_stat_key_, delta_time); }
238  }
239  }
240  }
241 }
Events should be routed by sequence ID (BR -&gt; EB)
void stopTokenReception(bool force=false)
Stops the reception of event builder tokens.
RoutingManagerMode
Mode indicating whether the RoutingManager is routing events by Sequence ID or by Send Count...
The RoutingToken contains the magic bytes, the rank of the token sender, and the number of slots free...
int TCP_listen_fd(int port, int rcvbuf)
Create a TCP listening socket on the given port and INADDR_ANY, with the given receive buffer...
TokenReceiver(const fhicl::ParameterSet &ps, std::shared_ptr< RoutingManagerPolicy > policy, detail::RoutingManagerMode routing_mode, size_t number_of_senders, size_t update_interval_msec)
TokenReceiver Constructor.
void startTokenReception()
Starts the reception of event builder tokens.
Events should be routed by send count (EB -&gt; Agg)
virtual ~TokenReceiver()
TokenReceiver Destructor.