artdaq  v2_03_00
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Pages
routing_master.cc
1 #include "MPIProg.hh"
2 #include "artdaq/Application/Routing/RoutingPacket.hh"
5 #include "cetlib/filepath_maker.h"
6 #include "fhiclcpp/ParameterSet.h"
7 #include "fhiclcpp/make_ParameterSet.h"
8 
9 #include "boost/program_options.hpp"
10 #include "boost/filesystem.hpp"
11 #include <artdaq/Application/RoutingMasterCore.hh>
12 #include <artdaq/Application/RoutingMasterApp.hh>
13 #include <netdb.h>
14 namespace bpo = boost::program_options;
15 
16 #include <algorithm>
17 #include <cmath>
18 #include <cstdio>
19 
20 extern "C"
21 {
22 #include <unistd.h>
23 }
24 
25 #include <iostream>
26 #include <memory>
27 #include <utility>
28 #include <arpa/inet.h>
29 #include <netinet/in.h>
30 #include <sys/types.h>
31 #include <sys/socket.h>
32 
33 extern "C"
34 {
35 #include <sys/time.h>
36 #include <sys/resource.h>
37 }
38 
39 class LockFile
40 {
41 public:
42  explicit LockFile(std::string path) : fileName_(path)
43  {
44  std::ofstream fstream(fileName_);
45  fstream << "Locked" << std::endl;
46  }
47  ~LockFile()
48  {
49  if(IsLocked(fileName_)) remove(fileName_.c_str());
50  }
51  static bool IsLocked(std::string path)
52  {
53  return boost::filesystem::exists(path);
54  }
55 
56 private:
57  std::string fileName_;
58 };
59 
63 class RoutingMasterTest : public MPIProg
64 {
65 public:
80  RoutingMasterTest(int argc, char* argv[]);
81 
85  void go();
86 
90  void generate_tokens();
91 
95  void routing_master();
96 
100  void table_receiver();
101 
108  fhicl::ParameterSet getPset(int argc, char* argv[]) const;
109 
110 private:
111  enum class TestRole_t : int
112  {
113  TOKEN_GEN = 0,
114  ROUTING_MASTER = 1,
115  TABLE_RECEIVER = 2
116  };
117 
118  void printHost(const std::string& functionName) const;
119 
120  fhicl::ParameterSet const pset_;
121  fhicl::ParameterSet const daq_pset_;
122  MPI_Comm local_group_comm_;
123  TestRole_t role_;
124 
125  std::string routing_master_address_;
126  std::string multicast_address_;
127  int token_port_;
128  int table_port_;
129  int ack_port_;
130  std::vector<int> eb_ranks_;
131  int token_count_;
132  size_t token_interval_us_;
133 };
134 
135 RoutingMasterTest::RoutingMasterTest(int argc, char* argv[]) :
136  MPIProg(argc, argv)
137  , pset_(getPset(argc, argv))
138  , daq_pset_(pset_.get<fhicl::ParameterSet>("daq"))
139  , local_group_comm_()
140  , routing_master_address_(daq_pset_.get<std::string>("routing_master_hostname", "localhost"))
141  , multicast_address_(daq_pset_.get<std::string>("table_update_address", "227.128.12.28"))
142  , token_port_(daq_pset_.get<int>("routing_token_port", 35555))
143  , table_port_(daq_pset_.get<int>("table_update_port", 35556))
144  , ack_port_(daq_pset_.get<int>("table_acknowledge_port", 35557))
145  , token_count_(pset_.get<int>("token_count", 1000))
146  , token_interval_us_(pset_.get<size_t>("token_interval_us", 5000))
147 {
148  assert(!(my_rank < 0));
149  switch (my_rank)
150  {
151  case 0:
152  role_ = TestRole_t::TOKEN_GEN;
153  break;
154  case 1:
155  role_ = TestRole_t::ROUTING_MASTER;
156  break;
157  default:
158  role_ = TestRole_t::TABLE_RECEIVER;
159  break;
160  }
161  auto policy_pset = daq_pset_.get<fhicl::ParameterSet>("policy");
162  eb_ranks_ = policy_pset.get<std::vector<int>>("receiver_ranks");
163 
164 }
165 
166 fhicl::ParameterSet RoutingMasterTest::getPset(int argc, char* argv[]) const
167 {
168  std::ostringstream descstr;
169  descstr << "-- <-c <config-file>>";
170  bpo::options_description desc(descstr.str());
171  desc.add_options()
172  ("config,c", bpo::value<std::string>(), "Configuration file.");
173  bpo::variables_map vm;
174  try
175  {
176  bpo::store(bpo::command_line_parser(argc, argv).
177  options(desc).allow_unregistered().run(), vm);
178  bpo::notify(vm);
179  }
180  catch (bpo::error const& e)
181  {
182  std::cerr << "Exception from command line processing in Config::getArtPset: " << e.what() << "\n";
183  throw "cmdline parsing error.";
184  }
185  if (!vm.count("config"))
186  {
187  std::cerr << "Expected \"-- -c <config-file>\" fhicl file specification.\n";
188  throw "cmdline parsing error.";
189  }
190  fhicl::ParameterSet pset;
191  cet::filepath_lookup lookup_policy("FHICL_FILE_PATH");
192  fhicl::make_ParameterSet(vm["config"].as<std::string>(), lookup_policy, pset);
193 
194  return pset;
195 }
196 
198 {
199  if (LockFile::IsLocked("/tmp/routing_master_t.lock")) return;
200  MPI_Barrier(MPI_COMM_WORLD);
201  std::unique_ptr<LockFile> lock;
202  if (my_rank == 0) {
203  lock = std::make_unique<LockFile>("/tmp/routing_master_t.lock");
204  }
205  //std::cout << "daq_pset_: " << daq_pset_.to_string() << std::endl << "conf_.makeParameterSet(): " << conf_.makeParameterSet().to_string() << std::endl;
206  MPI_Comm_split(MPI_COMM_WORLD, static_cast<int>(role_), 0, &local_group_comm_);
207  switch (role_)
208  {
209  case TestRole_t::TABLE_RECEIVER:
210  table_receiver();
211  break;
212  case TestRole_t::ROUTING_MASTER:
213  routing_master();
214  break;
215  case TestRole_t::TOKEN_GEN:
216  generate_tokens();
217  break;
218  default:
219  throw "No such node type";
220  }
221  TLOG_DEBUG("routing_master") << "Rank " << my_rank << " complete." << TLOG_ENDL;
222 }
223 
225 {
226  TLOG_DEBUG("generate_tokens") << "Init" << TLOG_ENDL;
227  printHost("generate_tokens");
228  sleep(1);
229 
230  int token_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
231  if (!token_socket)
232  {
233  TLOG_ERROR("generate_tokens") << "I failed to create the socket for sending Routing Tokens!" << TLOG_ENDL;
234  exit(1);
235  }
236  struct sockaddr_in token_addr;
237  auto sts = ResolveHost(routing_master_address_.c_str(), token_port_, token_addr);
238  if(sts == -1)
239  {
240  TLOG_ERROR("generate_tokens") << "Could not resolve host name" << TLOG_ENDL;
241  }
242 
243  connect(token_socket, (struct sockaddr*)&token_addr, sizeof(token_addr));
244 
245  int sent_tokens = 0;
246  std::map<int, int> token_counter;
247  for(auto rank : eb_ranks_)
248  {
249  token_counter[rank] = 0;
250  }
251  while (sent_tokens < token_count_) {
252  int this_rank = eb_ranks_[rand() % eb_ranks_.size()];
253  token_counter[this_rank]++;
255  token.header = TOKEN_MAGIC;
256  token.rank = this_rank;
257  token.new_slots_free = 1;
258 
259  TLOG_DEBUG("generate_tokens") << "Sending RoutingToken " << std::to_string(++sent_tokens) << " for rank " << this_rank << " to " << routing_master_address_ << TLOG_ENDL;
260  send(token_socket, &token, sizeof(artdaq::detail::RoutingToken), 0);
261  usleep(token_interval_us_);
262  }
263  auto max_rank = 0;
264  for(auto rank : token_counter)
265  {
266  if (rank.second > max_rank) max_rank = rank.second;
267  }
268  for(auto rank : token_counter)
269  {
271  token.header = TOKEN_MAGIC;
272  token.rank = rank.first;
273  token.new_slots_free = max_rank - rank.second;
274 
275  TLOG_DEBUG("generate_tokens") << "Sending RoutingToken " << std::to_string(++sent_tokens) << " for rank " << rank.first << " to " << routing_master_address_ << TLOG_ENDL;
276  send(token_socket, &token, sizeof(artdaq::detail::RoutingToken), 0);
277  usleep(token_interval_us_);
278 
279  }
280 
281  MPI_Comm_free(&local_group_comm_);
282  TLOG_INFO("generate_tokens") << "Waiting at MPI_Barrier" << TLOG_ENDL;
283  MPI_Barrier(MPI_COMM_WORLD);
284  TLOG_INFO("generate_tokens") << "Done with MPI_Barrier" << TLOG_ENDL;
285 }
286 
288 {
289  TLOG_DEBUG("table_receiver") << "Init" << TLOG_ENDL;
290  printHost("table_receiver");
291 
292 
293  auto table_socket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
294  if (!table_socket)
295  {
296  TLOG_ERROR("table_receiver") << "Error creating socket for receiving data requests!" << TLOG_ENDL;
297  exit(1);
298  }
299 
300  struct sockaddr_in si_me_request;
301 
302  int yes = 1;
303  if (setsockopt(table_socket, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
304  {
305  TLOG_ERROR("table_receiver") << " Unable to enable port reuse on request socket" << TLOG_ENDL;
306  exit(1);
307  }
308  memset(&si_me_request, 0, sizeof(si_me_request));
309  si_me_request.sin_family = AF_INET;
310  si_me_request.sin_port = htons(table_port_);
311  si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
312  if (bind(table_socket, (struct sockaddr *)&si_me_request, sizeof(si_me_request)) == -1)
313  {
314  TLOG_ERROR("table_receiver") << "Cannot bind request socket to port " << table_port_ << TLOG_ENDL;
315  exit(1);
316  }
317 
318  struct ip_mreq mreq;
319  long int sts = ResolveHost(multicast_address_.c_str(), mreq.imr_multiaddr);
320  if(sts == -1)
321  {
322  TLOG_ERROR("table_Receiver") << "Unable to resolve multicast hostname" << TLOG_ENDL;
323  exit(1);
324  }
325  mreq.imr_interface.s_addr = htonl(INADDR_ANY);
326  if (setsockopt(table_socket, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
327  {
328  TLOG_ERROR("table_receiver") << "Unable to join multicast group" << TLOG_ENDL;
329  exit(1);
330  }
331 
332  struct epoll_event ev;
333  int table_epoll_fd = epoll_create1(0);
334  ev.events = EPOLLIN | EPOLLPRI;
335  ev.data.fd = table_socket;
336  if (epoll_ctl(table_epoll_fd, EPOLL_CTL_ADD, table_socket, &ev) == -1)
337  {
338  TLOG_ERROR("table_receiver") << "Could not register listen socket to epoll fd" << TLOG_ENDL;
339  exit(3);
340  }
341 
342  auto ack_socket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
343  struct sockaddr_in ack_addr;
344  sts = ResolveHost(routing_master_address_.c_str(), ack_port_, ack_addr);
345  if(sts == -1)
346  {
347  TLOG_ERROR("table_Receiver") << "Unable to resolve routing master hostname" << TLOG_ENDL;
348  exit(1);
349  }
350 
351  if (table_socket == -1 || table_epoll_fd == -1 || ack_socket == -1)
352  {
353  TLOG_DEBUG("table_receiver") << "One of the listen sockets was not opened successfully." << TLOG_ENDL;
354  exit(4);
355  }
356  artdaq::Fragment::sequence_id_t max_sequence_id = token_count_;
357  artdaq::Fragment::sequence_id_t current_sequence_id = 0;
358  std::map<artdaq::Fragment::sequence_id_t, int> routing_table;
359  TLOG_INFO("table_receiver") << "Expecting " << std::to_string(max_sequence_id) << " as the last Sequence ID in this run" << TLOG_ENDL;
360  while (current_sequence_id < max_sequence_id)
361  {
362  std::vector<epoll_event> table_events_(4);
363  TLOG_DEBUG("table_receiver") << "Waiting for event on table socket" << TLOG_ENDL;
364  auto nfds = epoll_wait(table_epoll_fd, &table_events_[0], table_events_.size(), -1);
365  if (nfds == -1) {
366  perror("epoll_wait");
367  exit(EXIT_FAILURE);
368  }
369 
370  TLOG_DEBUG("table_receiver") << "Received " << nfds << " table update(s)" << TLOG_ENDL;
371  for (auto n = 0; n < nfds; ++n) {
372  auto first = artdaq::Fragment::InvalidSequenceID;
373  auto last = artdaq::Fragment::InvalidSequenceID;
375  recv(table_events_[n].data.fd, &hdr, sizeof(artdaq::detail::RoutingPacketHeader), 0);
376 
377  TLOG_DEBUG("table_receiver") << "Checking for valid header" << TLOG_ENDL;
378  if (hdr.header == ROUTING_MAGIC) {
380  TLOG_DEBUG("table_receiver") << "Receiving data buffer" << TLOG_ENDL;
381  sts = recv(table_events_[n].data.fd, &buffer[0], sizeof(artdaq::detail::RoutingPacketEntry) * hdr.nEntries, 0);
382  assert(static_cast<size_t>(sts) == sizeof(artdaq::detail::RoutingPacketEntry) * hdr.nEntries);
383 
384  first = buffer[0].sequence_id;
385  last = buffer[buffer.size() - 1].sequence_id;
386 
387  for (auto entry : buffer)
388  {
389  if (routing_table.count(entry.sequence_id))
390  {
391  assert(routing_table[entry.sequence_id] == entry.destination_rank);
392  continue;
393  }
394  routing_table[entry.sequence_id] = entry.destination_rank;
395  TLOG_DEBUG("table_receiver") << "table_receiver " << std::to_string(my_rank) << ": received update: SeqID " << std::to_string(entry.sequence_id) << " -> Rank " << std::to_string(entry.destination_rank) << TLOG_ENDL;
396  }
397 
399  ack.rank = my_rank;
400  ack.first_sequence_id = first;
401  ack.last_sequence_id = last;
402 
403  TLOG_DEBUG("table_receiver") << "Sending RoutingAckPacket with first= " << std::to_string(first) << " and last= " << std::to_string(last) << " to " << routing_master_address_ << ", port " << ack_port_ << TLOG_ENDL
404  sendto(ack_socket, &ack, sizeof(artdaq::detail::RoutingAckPacket), 0, (struct sockaddr *)&ack_addr, sizeof(ack_addr));
405  current_sequence_id = last;
406  }
407  }
408  }
409 
410  MPI_Comm_free(&local_group_comm_);
411  TLOG_INFO("table_receiver") << "Waiting at MPI_Barrier" << TLOG_ENDL;
412  MPI_Barrier(MPI_COMM_WORLD);
413  TLOG_INFO("table_receiver") << "Done with MPI_Barrier" << TLOG_ENDL;
414 }
415 
417 {
418  TLOG_DEBUG("routing_master") << "Init" << TLOG_ENDL;
419  printHost("routing_master");
420 
421  auto app = std::make_unique<artdaq::RoutingMasterApp>(local_group_comm_, "RoutingMaster");
422 
423  app->initialize(pset_, 0, 0);
424  app->do_start(art::RunID(1), 0, 0);
425  TLOG_INFO("routing_master") << "Waiting at MPI_Barrier" << TLOG_ENDL;
426  MPI_Barrier(MPI_COMM_WORLD);
427  TLOG_INFO("routing_master") << "Done with MPI_Barrier, calling RoutingMasterCore::stop" << TLOG_ENDL;
428  app->do_stop(0, 0);
429  TLOG_INFO("routing_master") << "Done with RoutingMasterCore::stop, calling shutdown" << TLOG_ENDL;
430  app->do_shutdown(0);
431  TLOG_INFO("routing_master") << "Done with RoutingMasterCore::shutdown" << TLOG_ENDL;
432  MPI_Comm_free(&local_group_comm_);
433 }
434 
435 void RoutingMasterTest::printHost(const std::string& functionName) const
436 {
437  char* doPrint = getenv("PRINT_HOST");
438  if (doPrint == 0) { return; }
439  const int ARRSIZE = 80;
440  char hostname[ARRSIZE];
441  std::string hostString;
442  if (!gethostname(hostname, ARRSIZE))
443  {
444  hostString = hostname;
445  }
446  else
447  {
448  hostString = "unknown";
449  }
450  TLOG_DEBUG("routing_master") << "Running " << functionName
451  << " on host " << hostString
452  << " with rank " << my_rank << "."
453  << TLOG_ENDL;
454 }
455 
456 void printUsage()
457 {
458  int myid = 0;
459  struct rusage usage;
460  getrusage(RUSAGE_SELF, &usage);
461  std::cout << myid << ":"
462  << " user=" << artdaq::Globals::timevalAsDouble(usage.ru_utime)
463  << " sys=" << artdaq::Globals::timevalAsDouble(usage.ru_stime)
464  << std::endl;
465 }
466 
467 int main(int argc, char* argv[])
468 {
469  artdaq::configureMessageFacility("routing_master", false);
470  int rc = 1;
471  try
472  {
473  RoutingMasterTest p(argc, argv);
474  std::cerr << "Started process " << my_rank << " of " << p.procs_ << ".\n";
475  p.go();
476  rc = 0;
477  }
478  catch (std::string& x)
479  {
480  std::cerr << "Exception (type string) caught in routing_master: "
481  << x
482  << '\n';
483  return 1;
484  }
485  catch (char const* m)
486  {
487  std::cerr << "Exception (type char const*) caught in routing_master: ";
488  if (m)
489  {
490  std::cerr << m;
491  }
492  else
493  {
494  std::cerr << "[the value was a null pointer, so no message is available]";
495  }
496  std::cerr << '\n';
497  }
498  return rc;
499 }
The RoutingMasterTest class runs the routing_master test.
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
Definition: TCPConnect.cc:29
A row of the Routing Table.
void routing_master()
Load a RoutingMasterCore instance, receive tokens from the token generators, and send table updates t...
A wrapper for a MPI program. Similar to MPISentry.
Definition: MPIProg.hh:10
A RoutingAckPacket contains the rank of the table receiver, plus the first and last sequence IDs in t...
void configureMessageFacility(char const *progname, bool useConsole=true)
Configure and start the message facility. Provide the program name so that messages will be appropria...
void go()
Start the test, using the role assigned.
unsigned new_slots_free
The number of slots free in the token sender (usually 1)
The RoutingToken contains the magic bytes, the rank of the token sender, and the number of slots free...
RoutingMasterTest(int argc, char *argv[])
RoutingMasterTest Constructor.
void table_receiver()
Receive Routing Tables from the Routing Master and send acknowledgement packets back.
Fragment::sequence_id_t first_sequence_id
The first sequence ID in the received RoutingPacket.
fhicl::ParameterSet getPset(int argc, char *argv[]) const
Parse the command line arguments and load a configuration FHiCL file.
The header of the Routing Table, containing the magic bytes and the number of entries.
uint32_t header
Magic bytes to make sure the packet wasn&#39;t garbled.
Fragment::sequence_id_t last_sequence_id
The last sequence ID in the received RoutingPacket.
std::vector< RoutingPacketEntry > RoutingPacket
A RoutingPacket is simply a vector of RoutingPacketEntry objects. It is not suitable for network tran...
int rank
The rank from which the RoutingToken came.
int rank
The rank from which the RoutingAckPacket came.
size_t nEntries
The number of RoutingPacketEntries in the RoutingPacket.
void generate_tokens()
Generate tokens and send them to the Routing Master.
static double timevalAsDouble(struct timeval tv)
Convert a timeval value to a double.
Definition: Globals.cc:6
uint32_t header
The magic bytes that help validate the RoutingToken.