artdaq  v3_08_00
TokenReceiver.cc
1 #include "artdaq/DAQdata/Globals.hh"
2 #define TRACE_NAME (app_name + "_TokenReceiver").c_str()
3 
4 #include <arpa/inet.h>
6 #include "artdaq/DAQrate/TokenReceiver.hh"
7 
8 artdaq::TokenReceiver::TokenReceiver(const fhicl::ParameterSet& ps, std::shared_ptr<RoutingMasterPolicy> policy,
9  detail::RoutingMasterMode routing_mode, size_t number_of_senders, size_t update_interval_msec)
10  : token_port_(ps.get<int>("routing_token_port", 35555))
11  , policy_(policy)
12  , routing_mode_(routing_mode)
13  , number_of_senders_(number_of_senders)
14  , update_interval_msec_(update_interval_msec)
15  , token_socket_(-1)
16  , token_epoll_fd_(-1)
17  , thread_is_running_(false)
18  , reception_is_paused_(false)
19  , shutdown_requested_(false)
20  , run_number_(0)
21  , received_token_counter_()
22  , statsHelperPtr_(nullptr)
23 {
24  receive_token_events_ = std::vector<epoll_event>(policy_->GetReceiverCount() + 1);
25 }
26 
28 {
29  stopTokenReception(true);
30 }
31 
33 {
34  if (token_thread_.joinable()) token_thread_.join();
35  boost::thread::attributes attrs;
36  attrs.set_stack_size(4096 * 2000); // 8000 KB
37 
38  reception_is_paused_ = false;
39  shutdown_requested_ = false;
40 
41  TLOG(TLVL_INFO) << "Starting Token Reception Thread";
42  try
43  {
44  token_thread_ = boost::thread(attrs, boost::bind(&TokenReceiver::receiveTokensLoop_, this));
45  }
46  catch (boost::exception const& e)
47  {
48  TLOG(TLVL_ERROR) << "Exception encountered starting Token Reception thread: " << boost::diagnostic_information(e) << ", errno=" << errno;
49  std::cerr << "Exception encountered starting Token Reception thread: " << boost::diagnostic_information(e) << ", errno=" << errno << std::endl;
50  exit(3);
51  }
52  received_token_count_ = 0;
53  thread_is_running_ = true;
54  TLOG(TLVL_INFO) << "Started Token Reception Thread";
55 }
56 
58 {
59  shutdown_requested_ = true;
60  reception_is_paused_ = false;
61  if (thread_is_running_)
62  {
63  if (received_token_count_ == 0 && !force)
64  {
65  TLOG(TLVL_DEBUG) << "Stop request received by TokenReceiver, but no tokens have ever been received.";
66  }
67  TLOG(TLVL_DEBUG) << "Joining tokenThread";
68  if (token_thread_.joinable()) token_thread_.join();
69  thread_is_running_ = false;
70  }
71 
72  if (token_socket_ != -1)
73  {
74  close(token_socket_);
75  token_socket_ = -1;
76  token_epoll_fd_ = -1;
77  }
78 }
79 
80 void artdaq::TokenReceiver::receiveTokensLoop_()
81 {
82  while (!shutdown_requested_)
83  {
84  TLOG(TLVL_DEBUG) << "Receive Token loop start";
85  if (token_socket_ == -1)
86  {
87  TLOG(TLVL_DEBUG) << "Opening token listener socket";
88  token_socket_ = TCP_listen_fd(token_port_, 3 * sizeof(detail::RoutingToken));
89  fcntl(token_socket_, F_SETFL, O_NONBLOCK); // set O_NONBLOCK
90 
91  if (token_epoll_fd_ != -1) close(token_epoll_fd_);
92  struct epoll_event ev;
93  token_epoll_fd_ = epoll_create1(0);
94  ev.events = EPOLLIN | EPOLLPRI;
95  ev.data.fd = token_socket_;
96  if (epoll_ctl(token_epoll_fd_, EPOLL_CTL_ADD, token_socket_, &ev) == -1)
97  {
98  TLOG(TLVL_ERROR) << "Could not register listen socket to epoll fd";
99  exit(3);
100  }
101  }
102  if (token_socket_ == -1 || token_epoll_fd_ == -1)
103  {
104  TLOG(TLVL_DEBUG) << "One of the listen sockets was not opened successfully.";
105  return;
106  }
107 
108  auto nfds = epoll_wait(token_epoll_fd_, &receive_token_events_[0], receive_token_events_.size(), update_interval_msec_);
109  if (nfds == -1)
110  {
111  TLOG(TLVL_ERROR) << "Error status received from epoll_wait, exiting with code " << EXIT_FAILURE << ", errno=" << errno << " (" << strerror(errno) << ")";
112  perror("epoll_wait");
113  exit(EXIT_FAILURE);
114  }
115 
116  while (reception_is_paused_ && !shutdown_requested_)
117  {
118  usleep(10000);
119  }
120 
121  TLOG(TLVL_DEBUG) << "Received " << nfds << " events";
122  for (auto n = 0; n < nfds; ++n)
123  {
124  if (receive_token_events_[n].data.fd == token_socket_)
125  {
126  TLOG(TLVL_DEBUG) << "Accepting new connection on token_socket";
127  sockaddr_in addr;
128  socklen_t arglen = sizeof(addr);
129  auto conn_sock = accept(token_socket_, (struct sockaddr*)&addr, &arglen);
130  fcntl(conn_sock, F_SETFL, O_NONBLOCK); // set O_NONBLOCK
131 
132  if (conn_sock == -1)
133  {
134  TLOG(TLVL_ERROR) << "Error status received from accept, exiting with code " << EXIT_FAILURE << ", errno=" << errno << " (" << strerror(errno) << ")";
135  perror("accept");
136  exit(EXIT_FAILURE);
137  }
138 
139  receive_token_addrs_[conn_sock] = std::string(inet_ntoa(addr.sin_addr));
140  TLOG(TLVL_DEBUG) << "New fd is " << conn_sock << " for data-receiver at " << receive_token_addrs_[conn_sock];
141  struct epoll_event ev;
142  ev.events = EPOLLIN | EPOLLET;
143  ev.data.fd = conn_sock;
144  if (epoll_ctl(token_epoll_fd_, EPOLL_CTL_ADD, conn_sock, &ev) == -1)
145  {
146  TLOG(TLVL_ERROR) << "Error status received from epoll_ctl, exiting with code " << EXIT_FAILURE << ", errno=" << errno << " (" << strerror(errno) << ")";
147  perror("epoll_ctl: conn_sock");
148  exit(EXIT_FAILURE);
149  }
150  }
151  else
152  {
153  auto startTime = artdaq::MonitoredQuantity::getCurrentTime();
154  bool reading = true;
155  int sts = 0;
156  while (reading)
157  {
158  detail::RoutingToken buff;
159  sts += read(receive_token_events_[n].data.fd, &buff, sizeof(detail::RoutingToken) - sts);
160  if (sts == 0)
161  {
162  TLOG(TLVL_INFO) << "Received 0-size token from " << receive_token_addrs_[receive_token_events_[n].data.fd];
163  reading = false;
164  }
165  else if (sts < 0 && errno == EAGAIN)
166  {
167  TLOG(TLVL_DEBUG) << "No more tokens from this rank. Continuing poll loop.";
168  reading = false;
169  }
170  else if (sts < 0)
171  {
172  TLOG(TLVL_ERROR) << "Error reading from token socket: sts=" << sts << ", errno=" << errno;
173  receive_token_addrs_.erase(receive_token_events_[n].data.fd);
174  close(receive_token_events_[n].data.fd);
175  epoll_ctl(token_epoll_fd_, EPOLL_CTL_DEL, receive_token_events_[n].data.fd, NULL);
176  reading = false;
177  }
178  else if (sts == sizeof(detail::RoutingToken) && buff.header != TOKEN_MAGIC)
179  {
180  TLOG(TLVL_ERROR) << "Received invalid token from " << receive_token_addrs_[receive_token_events_[n].data.fd] << " sts=" << sts;
181  reading = false;
182  }
183  else if (sts == sizeof(detail::RoutingToken))
184  {
185  sts = 0;
186  TLOG(TLVL_DEBUG) << "Received token from " << buff.rank << " indicating " << buff.new_slots_free << " slots are free. (run=" << buff.run_number << ")";
187  if (buff.run_number != run_number_)
188  {
189  TLOG(TLVL_DEBUG) << "Received token from a different run number! Current = " << run_number_ << ", token = " << buff.run_number << ", ignoring (n=" << buff.new_slots_free << ")";
190  }
191  else
192  {
193  received_token_count_ += buff.new_slots_free;
195  {
196  policy_->AddReceiverToken(buff.rank, buff.new_slots_free);
197  }
198  else if (routing_mode_ == detail::RoutingMasterMode::RouteBySendCount)
199  {
200  if (!received_token_counter_.count(buff.rank)) received_token_counter_[buff.rank] = 0;
201  received_token_counter_[buff.rank] += buff.new_slots_free;
202  TLOG(TLVL_DEBUG) << "RoutingMasterMode is RouteBySendCount. I have " << received_token_counter_[buff.rank] << " tokens for rank " << buff.rank << " and I need " << number_of_senders_ << ".";
203  while (received_token_counter_[buff.rank] >= number_of_senders_)
204  {
205  TLOG(TLVL_DEBUG) << "RoutingMasterMode is RouteBySendCount. I have " << received_token_counter_[buff.rank] << " tokens for rank " << buff.rank << " and I need " << number_of_senders_
206  << "... Sending token to policy";
207  policy_->AddReceiverToken(buff.rank, 1);
208  received_token_counter_[buff.rank] -= number_of_senders_;
209  }
210  }
211  }
212  }
213  }
214  auto delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
215  if (statsHelperPtr_.get() != nullptr) { statsHelperPtr_->addSample(tokens_received_stat_key_, delta_time); }
216  }
217  }
218  }
219 }
RoutingMasterMode
Mode indicating whether the RoutingMaster is routing events by Sequence ID or by Send Count...
void stopTokenReception(bool force=false)
Stops the reception of event builder tokens.
TokenReceiver(const fhicl::ParameterSet &ps, std::shared_ptr< RoutingMasterPolicy > policy, detail::RoutingMasterMode routing_mode, size_t number_of_senders, size_t update_interval_msec)
TokenReceiver Constructor.
Definition: TokenReceiver.cc:8
Events should be routed by sequence ID (BR -&gt; EB)
The RoutingToken contains the magic bytes, the rank of the token sender, and the number of slots free...
int TCP_listen_fd(int port, int rcvbuf)
Create a TCP listening socket on the given port and INADDR_ANY, with the given receive buffer...
void startTokenReception()
Starts the reception of event builder tokens.
virtual ~TokenReceiver()
TokenReceiver Destructor.
Events should be routed by send count (EB -&gt; Agg)