artdaq  v3_09_05
TokenReceiver.cc
1 #include "artdaq/DAQdata/Globals.hh"
2 #define TRACE_NAME (app_name + "_TokenReceiver").c_str()
3 
4 #include <arpa/inet.h>
5 
6 #include <utility>
7 
8 #include <utility>
10 #include "artdaq/DAQrate/TokenReceiver.hh"
11 
12 artdaq::TokenReceiver::TokenReceiver(const fhicl::ParameterSet& ps, std::shared_ptr<RoutingManagerPolicy> policy,
13  detail::RoutingManagerMode routing_mode, size_t number_of_senders, size_t update_interval_msec)
14  : token_port_(ps.get<int>("routing_token_port", 35555))
15  , policy_(std::move(std::move(policy)))
16  , routing_mode_(routing_mode)
17  , number_of_senders_(number_of_senders)
18  , update_interval_msec_(update_interval_msec)
19  , token_socket_(-1)
20  , token_epoll_fd_(-1)
21  , thread_is_running_(false)
22  , reception_is_paused_(false)
23  , shutdown_requested_(false)
24  , run_number_(0)
25  , statsHelperPtr_(nullptr)
26 {
27  receive_token_events_ = std::vector<epoll_event>(policy_->GetReceiverCount() + 1);
28 }
29 
31 {
32  stopTokenReception(true);
33 }
34 
36 {
37  if (token_thread_.joinable())
38  {
39  token_thread_.join();
40  }
41  boost::thread::attributes attrs;
42  attrs.set_stack_size(4096 * 2000); // 8000 KB
43 
44  reception_is_paused_ = false;
45  shutdown_requested_ = false;
46 
47  TLOG(TLVL_INFO) << "Starting Token Reception Thread";
48  try
49  {
50  token_thread_ = boost::thread(attrs, boost::bind(&TokenReceiver::receiveTokensLoop_, this));
51  char tname[16]; // Size 16 - see man page pthread_setname_np(3) and/or prctl(2)
52  snprintf(tname, sizeof(tname)-1, "%d-TokenRecv", my_rank); // NOLINT
53  tname[sizeof(tname)-1] = '\0'; // assure term. snprintf is not too evil :)
54  auto handle = token_thread_.native_handle();
55  pthread_setname_np(handle, tname);
56  }
57  catch (boost::exception const& e)
58  {
59  TLOG(TLVL_ERROR) << "Exception encountered starting Token Reception thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
60  std::cerr << "Exception encountered starting Token Reception thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
61  exit(3);
62  }
63  received_token_count_ = 0;
64  thread_is_running_ = true;
65  TLOG(TLVL_INFO) << "Started Token Reception Thread";
66 }
67 
69 {
70  shutdown_requested_ = true;
71  reception_is_paused_ = false;
72  if (thread_is_running_)
73  {
74  if (received_token_count_ == 0 && !force)
75  {
76  TLOG(TLVL_DEBUG) << "Stop request received by TokenReceiver, but no tokens have ever been received.";
77  }
78  TLOG(TLVL_DEBUG) << "Joining tokenThread";
79  try
80  {
81  if (token_thread_.joinable())
82  {
83  token_thread_.join();
84  }
85  }
86  catch (...)
87  {
88  // IGNORED
89  }
90  thread_is_running_ = false;
91  }
92 
93  if (token_socket_ != -1)
94  {
95  close(token_socket_);
96  token_socket_ = -1;
97  token_epoll_fd_ = -1;
98  }
99 }
100 
101 void artdaq::TokenReceiver::receiveTokensLoop_()
102 {
103  while (!shutdown_requested_)
104  {
105  TLOG(TLVL_DEBUG) << "Receive Token loop start";
106  if (token_socket_ == -1)
107  {
108  TLOG(TLVL_DEBUG) << "Opening token listener socket";
109  token_socket_ = TCP_listen_fd(token_port_, 3 * sizeof(detail::RoutingToken));
110  fcntl(token_socket_, F_SETFL, O_NONBLOCK); // set O_NONBLOCK
111 
112  if (token_epoll_fd_ != -1)
113  {
114  close(token_epoll_fd_);
115  }
116  struct epoll_event ev;
117  token_epoll_fd_ = epoll_create1(0);
118  ev.events = EPOLLIN | EPOLLPRI;
119  ev.data.fd = token_socket_;
120  if (epoll_ctl(token_epoll_fd_, EPOLL_CTL_ADD, token_socket_, &ev) == -1)
121  {
122  TLOG(TLVL_ERROR) << "Could not register listen socket to epoll fd";
123  exit(3);
124  }
125  }
126  if (token_socket_ == -1 || token_epoll_fd_ == -1)
127  {
128  TLOG(TLVL_DEBUG) << "One of the listen sockets was not opened successfully.";
129  return;
130  }
131 
132  auto nfds = epoll_wait(token_epoll_fd_, &receive_token_events_[0], receive_token_events_.size(), update_interval_msec_);
133  if (nfds == -1)
134  {
135  TLOG(TLVL_ERROR) << "Error status received from epoll_wait, exiting with code " << EXIT_FAILURE << ", errno=" << errno << " (" << strerror(errno) << ")";
136  perror("epoll_wait");
137  exit(EXIT_FAILURE);
138  }
139 
140  while (reception_is_paused_ && !shutdown_requested_)
141  {
142  usleep(10000);
143  }
144 
145  TLOG(TLVL_DEBUG) << "Received " << nfds << " events";
146  for (auto n = 0; n < nfds; ++n)
147  {
148  if (receive_token_events_[n].data.fd == token_socket_)
149  {
150  TLOG(TLVL_DEBUG) << "Accepting new connection on token_socket";
151  sockaddr_in addr;
152  socklen_t arglen = sizeof(addr);
153  auto conn_sock = accept(token_socket_, reinterpret_cast<struct sockaddr*>(&addr), &arglen); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
154  fcntl(conn_sock, F_SETFL, O_NONBLOCK); // set O_NONBLOCK
155 
156  if (conn_sock == -1)
157  {
158  TLOG(TLVL_ERROR) << "Error status received from accept, exiting with code " << EXIT_FAILURE << ", errno=" << errno << " (" << strerror(errno) << ")";
159  perror("accept");
160  exit(EXIT_FAILURE);
161  }
162 
163  receive_token_addrs_[conn_sock] = std::string(inet_ntoa(addr.sin_addr));
164  TLOG(TLVL_DEBUG) << "New fd is " << conn_sock << " for data-receiver at " << receive_token_addrs_[conn_sock];
165  struct epoll_event ev;
166  ev.events = EPOLLIN | EPOLLET;
167  ev.data.fd = conn_sock;
168  if (epoll_ctl(token_epoll_fd_, EPOLL_CTL_ADD, conn_sock, &ev) == -1)
169  {
170  TLOG(TLVL_ERROR) << "Error status received from epoll_ctl, exiting with code " << EXIT_FAILURE << ", errno=" << errno << " (" << strerror(errno) << ")";
171  perror("epoll_ctl: conn_sock");
172  exit(EXIT_FAILURE);
173  }
174  }
175  else
176  {
177  auto startTime = artdaq::MonitoredQuantity::getCurrentTime();
178  bool reading = true;
179  int sts = 0;
180  while (reading)
181  {
182  detail::RoutingToken buff;
183  sts += read(receive_token_events_[n].data.fd, &buff, sizeof(detail::RoutingToken) - sts);
184  if (sts == 0)
185  {
186  TLOG(TLVL_INFO) << "Received 0-size token from " << receive_token_addrs_[receive_token_events_[n].data.fd];
187  reading = false;
188  }
189  else if (sts < 0 && errno == EAGAIN)
190  {
191  TLOG(TLVL_DEBUG) << "No more tokens from this rank. Continuing poll loop.";
192  reading = false;
193  }
194  else if (sts < 0)
195  {
196  TLOG(TLVL_ERROR) << "Error reading from token socket: sts=" << sts << ", errno=" << errno;
197  receive_token_addrs_.erase(receive_token_events_[n].data.fd);
198  close(receive_token_events_[n].data.fd);
199  epoll_ctl(token_epoll_fd_, EPOLL_CTL_DEL, receive_token_events_[n].data.fd, nullptr);
200  reading = false;
201  }
202  else if (sts == sizeof(detail::RoutingToken) && buff.header != TOKEN_MAGIC)
203  {
204  TLOG(TLVL_ERROR) << "Received invalid token from " << receive_token_addrs_[receive_token_events_[n].data.fd] << " sts=" << sts;
205  reading = false;
206  }
207  else if (sts == sizeof(detail::RoutingToken))
208  {
209  sts = 0;
210  TLOG(TLVL_DEBUG) << "Received token from " << buff.rank << " indicating " << buff.new_slots_free << " slots are free. (run=" << buff.run_number << ")";
211  if (buff.run_number != run_number_)
212  {
213  TLOG(TLVL_DEBUG) << "Received token from a different run number! Current = " << run_number_ << ", token = " << buff.run_number << ", ignoring (n=" << buff.new_slots_free << ")";
214  }
215  else
216  {
217  received_token_count_ += buff.new_slots_free;
219  {
220  policy_->AddReceiverToken(buff.rank, buff.new_slots_free);
221  }
222  else if (routing_mode_ == detail::RoutingManagerMode::RouteBySendCount)
223  {
224  if (received_token_counter_.count(buff.rank) == 0u)
225  {
226  received_token_counter_[buff.rank] = 0;
227  }
228  received_token_counter_[buff.rank] += buff.new_slots_free;
229  TLOG(TLVL_DEBUG) << "RoutingManagerMode is RouteBySendCount. I have " << received_token_counter_[buff.rank] << " tokens for rank " << buff.rank << " and I need " << number_of_senders_ << ".";
230  while (received_token_counter_[buff.rank] >= number_of_senders_)
231  {
232  TLOG(TLVL_DEBUG) << "RoutingManagerMode is RouteBySendCount. I have " << received_token_counter_[buff.rank] << " tokens for rank " << buff.rank << " and I need " << number_of_senders_
233  << "... Sending token to policy";
234  policy_->AddReceiverToken(buff.rank, 1);
235  received_token_counter_[buff.rank] -= number_of_senders_;
236  }
237  }
238  }
239  }
240  }
241  auto delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
242  if (statsHelperPtr_ != nullptr) { statsHelperPtr_->addSample(tokens_received_stat_key_, delta_time); }
243  }
244  }
245  }
246 }
Events should be routed by sequence ID (BR -&gt; EB)
void stopTokenReception(bool force=false)
Stops the reception of event builder tokens.
RoutingManagerMode
Mode indicating whether the RoutingManager is routing events by Sequence ID or by Send Count...
The RoutingToken contains the magic bytes, the rank of the token sender, and the number of slots free...
int TCP_listen_fd(int port, int rcvbuf)
Create a TCP listening socket on the given port and INADDR_ANY, with the given receive buffer...
TokenReceiver(const fhicl::ParameterSet &ps, std::shared_ptr< RoutingManagerPolicy > policy, detail::RoutingManagerMode routing_mode, size_t number_of_senders, size_t update_interval_msec)
TokenReceiver Constructor.
void startTokenReception()
Starts the reception of event builder tokens.
Events should be routed by send count (EB -&gt; Agg)
virtual ~TokenReceiver()
TokenReceiver Destructor.