1 #define TRACE_NAME "routing_master_t"
4 #include "artdaq/DAQdata/TCPConnect.hh"
5 #include "artdaq/DAQrate/detail/RoutingPacket.hh"
6 #include "cetlib/filepath_maker.h"
7 #include "fhiclcpp/ParameterSet.h"
8 #include "fhiclcpp/make_ParameterSet.h"
11 #include <boost/program_options.hpp>
12 #include "artdaq/Application/RoutingMasterApp.hh"
13 #include "artdaq/Application/RoutingMasterCore.hh"
14 namespace bpo = boost::program_options;
24 #include <arpa/inet.h>
25 #include <netinet/in.h>
26 #include <sys/socket.h>
27 #include <sys/types.h>
33 #include <sys/resource.h>
85 fhicl::ParameterSet
getPset(
int argc,
char* argv[])
const;
88 enum class TestRole_t : int { TOKEN_GEN = 0, ROUTING_MASTER = 1, TABLE_RECEIVER = 2 };
90 void printHost(
const std::string& functionName)
const;
92 fhicl::ParameterSet
const pset_;
93 fhicl::ParameterSet
const daq_pset_;
96 std::string routing_master_address_;
97 std::string multicast_address_;
101 std::vector<int> eb_ranks_;
103 size_t token_interval_us_;
109 pset_(getPset(argc, argv)),
110 daq_pset_(pset_.get<fhicl::ParameterSet>(
"daq")),
111 routing_master_address_(daq_pset_.get<std::string>(
"routing_master_hostname",
"localhost")),
112 multicast_address_(daq_pset_.get<std::string>(
"table_update_address",
"227.128.12.28")),
113 token_port_(daq_pset_.get<int>(
"routing_token_port", 35555)),
114 table_port_(daq_pset_.get<int>(
"table_update_port", 35556)),
115 ack_port_(daq_pset_.get<int>(
"table_acknowledge_port", 35557)),
116 token_count_(pset_.get<int>(
"token_count", 1000)),
117 token_interval_us_(pset_.get<size_t>(
"token_interval_us", 5000)),
118 run_number_(pset_.get<size_t>(
"run_number")) {
119 assert(!(my_rank < 0));
122 role_ = TestRole_t::TOKEN_GEN;
125 role_ = TestRole_t::ROUTING_MASTER;
128 role_ = TestRole_t::TABLE_RECEIVER;
131 auto policy_pset = daq_pset_.get<fhicl::ParameterSet>(
"policy");
132 eb_ranks_ = policy_pset.get<std::vector<int>>(
"receiver_ranks");
136 std::ostringstream descstr;
137 descstr <<
"-- <-c <config-file>>";
138 bpo::options_description desc(descstr.str());
139 desc.add_options()(
"config,c", bpo::value<std::string>(),
"Configuration file.");
140 bpo::variables_map vm;
142 bpo::store(bpo::command_line_parser(argc, argv).options(desc).allow_unregistered().run(), vm);
144 }
catch (bpo::error
const& e) {
145 std::cerr <<
"Exception from command line processing in Config::getArtPset: " << e.what() <<
"\n";
146 throw "cmdline parsing error.";
148 if (!vm.count(
"config")) {
149 std::cerr <<
"Expected \"-- -c <config-file>\" fhicl file specification.\n";
150 throw "cmdline parsing error.";
152 fhicl::ParameterSet pset;
153 cet::filepath_lookup lookup_policy(
"FHICL_FILE_PATH");
154 fhicl::make_ParameterSet(vm[
"config"].as<std::string>(), lookup_policy, pset);
160 TLOG(TLVL_INFO) <<
"Entering MPI_Barrier";
161 MPI_Barrier(MPI_COMM_WORLD);
162 TLOG(TLVL_INFO) <<
"Done with Barrier";
167 case TestRole_t::TABLE_RECEIVER:
170 case TestRole_t::ROUTING_MASTER:
173 case TestRole_t::TOKEN_GEN:
177 throw "No such node type";
179 TLOG(TLVL_INFO) <<
"Rank " << my_rank <<
" complete.";
183 TLOG(TLVL_INFO) <<
"generate_tokens(): Init";
184 printHost(
"generate_tokens");
187 int token_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
188 if (token_socket < 0) {
189 TLOG(TLVL_ERROR) <<
"generate_tokens(): I failed to create the socket for sending Routing Tokens!";
192 struct sockaddr_in token_addr;
193 auto sts = ResolveHost(routing_master_address_.c_str(), token_port_, token_addr);
195 TLOG(TLVL_ERROR) <<
"generate_tokens(): Could not resolve host name";
198 connect(token_socket, (
struct sockaddr*)&token_addr,
sizeof(token_addr));
201 std::map<int, int> token_counter;
202 for (
auto rank : eb_ranks_) {
203 token_counter[rank] = 0;
205 while (sent_tokens < token_count_) {
206 int this_rank = eb_ranks_[seedAndRandom() % eb_ranks_.size()];
207 token_counter[this_rank]++;
208 artdaq::detail::RoutingToken token;
209 token.header = TOKEN_MAGIC;
210 token.rank = this_rank;
211 token.new_slots_free = 1;
212 token.run_number = run_number_;
214 TLOG(TLVL_INFO) <<
"generate_tokens(): Sending RoutingToken " << ++sent_tokens <<
" for rank " << this_rank
215 <<
" to " << routing_master_address_;
216 send(token_socket, &token,
sizeof(artdaq::detail::RoutingToken), 0);
217 usleep(token_interval_us_);
220 for (
auto rank : token_counter) {
221 if (rank.second > max_rank) max_rank = rank.second;
223 for (
auto rank : token_counter) {
224 artdaq::detail::RoutingToken token;
225 token.header = TOKEN_MAGIC;
226 token.rank = rank.first;
227 token.new_slots_free = max_rank - rank.second;
228 token.run_number = run_number_;
230 TLOG(TLVL_INFO) <<
"generate_tokens(): Sending RoutingToken " << ++sent_tokens <<
" for rank " << rank.first
231 <<
" to " << routing_master_address_;
232 send(token_socket, &token,
sizeof(artdaq::detail::RoutingToken), 0);
233 usleep(token_interval_us_);
236 TLOG(TLVL_INFO) <<
"generate_tokens(): Waiting at MPI_Barrier";
237 MPI_Barrier(MPI_COMM_WORLD);
238 TLOG(TLVL_INFO) <<
"generate_tokens(): Done with MPI_Barrier";
242 TLOG(TLVL_INFO) <<
"table_receiver(): Init";
243 printHost(
"table_receiver");
245 auto table_socket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
246 if (table_socket < 0) {
247 TLOG(TLVL_ERROR) <<
"table_receiver(): Error creating socket for receiving data requests!";
251 struct sockaddr_in si_me_request;
254 if (setsockopt(table_socket, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0) {
255 TLOG(TLVL_ERROR) <<
"table_receiver(): Unable to enable port reuse on request socket";
258 memset(&si_me_request, 0,
sizeof(si_me_request));
259 si_me_request.sin_family = AF_INET;
260 si_me_request.sin_port = htons(table_port_);
261 si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
262 if (bind(table_socket, (
struct sockaddr*)&si_me_request,
sizeof(si_me_request)) == -1) {
263 TLOG(TLVL_ERROR) <<
"table_receiver(): Cannot bind request socket to port " << table_port_;
268 long int sts = ResolveHost(multicast_address_.c_str(), mreq.imr_multiaddr);
270 TLOG(TLVL_ERROR) <<
"table_receiver(): Unable to resolve multicast hostname";
273 mreq.imr_interface.s_addr = htonl(INADDR_ANY);
274 if (setsockopt(table_socket, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq,
sizeof(mreq)) < 0) {
275 TLOG(TLVL_ERROR) <<
"table_receiver(): Unable to join multicast group";
279 struct epoll_event ev;
280 int table_epoll_fd = epoll_create1(0);
281 ev.events = EPOLLIN | EPOLLPRI;
282 ev.data.fd = table_socket;
283 if (epoll_ctl(table_epoll_fd, EPOLL_CTL_ADD, table_socket, &ev) == -1) {
284 TLOG(TLVL_ERROR) <<
"table_receiver(): Could not register listen socket to epoll fd";
288 auto ack_socket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
289 struct sockaddr_in ack_addr;
290 sts = ResolveHost(routing_master_address_.c_str(), ack_port_, ack_addr);
292 TLOG(TLVL_ERROR) <<
"table_receiver(): Unable to resolve routing master hostname";
296 if (table_socket == -1 || table_epoll_fd == -1 || ack_socket == -1) {
297 TLOG(TLVL_INFO) <<
"table_receiver(): One of the listen sockets was not opened successfully.";
300 artdaq::Fragment::sequence_id_t max_sequence_id = token_count_;
301 artdaq::Fragment::sequence_id_t current_sequence_id = 0;
302 std::map<artdaq::Fragment::sequence_id_t, int> routing_table;
303 TLOG(TLVL_INFO) <<
"table_receiver(): Expecting " << max_sequence_id <<
" as the last Sequence ID in this run";
304 while (current_sequence_id < max_sequence_id) {
305 std::vector<epoll_event> table_events_(4);
306 TLOG(TLVL_INFO) <<
"table_receiver(): Waiting for event on table socket";
307 auto nfds = epoll_wait(table_epoll_fd, &table_events_[0], table_events_.size(), -1);
309 perror(
"epoll_wait");
313 TLOG(TLVL_INFO) <<
"table_receiver(): Received " << nfds <<
" table update(s)";
314 for (
auto n = 0; n < nfds; ++n) {
315 auto first = artdaq::Fragment::InvalidSequenceID;
316 auto last = artdaq::Fragment::InvalidSequenceID;
318 std::vector<uint8_t> buf(MAX_ROUTING_TABLE_SIZE);
319 artdaq::detail::RoutingPacketHeader hdr;
320 auto stss = recv(table_events_[n].data.fd, &buf[0], MAX_ROUTING_TABLE_SIZE, 0);
322 if (stss > static_cast<ssize_t>(
sizeof(hdr))) {
323 memcpy(&hdr, &buf[0],
sizeof(hdr));
325 TLOG(TLVL_TRACE) << __func__ <<
": Incorrect size received. Discarding.";
329 TLOG(TLVL_INFO) <<
"table_receiver(): Checking for valid header";
330 if (hdr.header == ROUTING_MAGIC) {
331 artdaq::detail::RoutingPacket buffer(hdr.nEntries);
332 TLOG(TLVL_INFO) <<
"table_receiver(): Receiving data buffer";
333 memcpy(&buffer[0], &buf[
sizeof(artdaq::detail::RoutingPacketHeader)],
334 sizeof(artdaq::detail::RoutingPacketEntry) * hdr.nEntries);
336 first = buffer[0].sequence_id;
337 last = buffer[buffer.size() - 1].sequence_id;
339 for (
auto entry : buffer) {
340 if (routing_table.count(entry.sequence_id)) {
341 assert(routing_table[entry.sequence_id] == entry.destination_rank);
344 routing_table[entry.sequence_id] = entry.destination_rank;
345 TLOG(TLVL_INFO) <<
"table_receiver(): table_receiver " << my_rank <<
": received update: SeqID "
346 << entry.sequence_id <<
" -> Rank " << entry.destination_rank;
349 artdaq::detail::RoutingAckPacket ack;
351 ack.first_sequence_id = first;
352 ack.last_sequence_id = last;
354 TLOG(TLVL_INFO) <<
"table_receiver(): Sending RoutingAckPacket with first= " << first <<
" and last= " << last
355 <<
" to " << routing_master_address_ <<
", port " << ack_port_;
356 sendto(ack_socket, &ack,
sizeof(artdaq::detail::RoutingAckPacket), 0, (
struct sockaddr*)&ack_addr,
358 current_sequence_id = last;
363 TLOG(TLVL_INFO) <<
"table_receiver(): Waiting at MPI_Barrier";
364 MPI_Barrier(MPI_COMM_WORLD);
365 TLOG(TLVL_INFO) <<
"table_receiver(): Done with MPI_Barrier";
369 TLOG(TLVL_INFO) <<
"routing_master: Init";
370 printHost(
"routing_master");
372 app_name =
"RoutingMaster";
374 auto app = std::make_unique<artdaq::RoutingMasterApp>();
376 auto sts = app->initialize(pset_, 0, 0);
378 TLOG(TLVL_ERROR) <<
"routing_master: Failed to initalize!";
380 app->do_start(art::RunID(run_number_), 0, 0);
381 TLOG(TLVL_INFO) <<
"routing_master: Waiting at MPI_Barrier";
382 MPI_Barrier(MPI_COMM_WORLD);
383 TLOG(TLVL_INFO) <<
"routing_master: Done with MPI_Barrier, calling RoutingMasterCore::stop";
385 TLOG(TLVL_INFO) <<
"routing_master: Done with RoutingMasterCore::stop, calling shutdown";
387 TLOG(TLVL_INFO) <<
"routing_master: Done with RoutingMasterCore::shutdown";
390 void RoutingMasterTest::printHost(
const std::string& functionName)
const {
391 char* doPrint = getenv(
"PRINT_HOST");
395 const int ARRSIZE = 80;
396 char hostname[ARRSIZE];
397 std::string hostString;
398 if (!gethostname(hostname, ARRSIZE)) {
399 hostString = hostname;
401 hostString =
"unknown";
403 TLOG(TLVL_INFO) <<
"Running " << functionName <<
" on host " << hostString <<
" with rank " << my_rank <<
".";
409 getrusage(RUSAGE_SELF, &usage);
410 std::cout << myid <<
":"
411 <<
" user=" << artdaq::TimeUtils::convertUnixTimeToSeconds(usage.ru_utime)
412 <<
" sys=" << artdaq::TimeUtils::convertUnixTimeToSeconds(usage.ru_stime) << std::endl;
415 int main(
int argc,
char* argv[]) {
416 artdaq::configureMessageFacility(
"routing_master",
true);
420 std::cerr <<
"PID: " << getpid() << std::endl;
421 volatile bool attach =
true;
430 std::cerr <<
"Started process " << my_rank <<
" of " << p.procs_ <<
".\n";
433 }
catch (std::string& x) {
434 std::cerr <<
"Exception (type string) caught in routing_master: " << x <<
'\n';
436 }
catch (
char const* m) {
437 std::cerr <<
"Exception (type char const*) caught in routing_master: ";
441 std::cerr <<
"[the value was a null pointer, so no message is available]";
The RoutingMasterTest class runs the routing_master test.
void routing_master()
Load a RoutingMasterCore instance, receive tokens from the token generators, and send table updates t...
A wrapper for a MPI program. Similar to MPISentry.
void go()
Start the test, using the role assigned.
RoutingMasterTest(int argc, char *argv[])
RoutingMasterTest Constructor.
void table_receiver()
Receive Routing Tables from the Routing Master and send acknowledgement packets back.
fhicl::ParameterSet getPset(int argc, char *argv[]) const
Parse the command line arguments and load a configuration FHiCL file.
void generate_tokens()
Generate tokens and send them to the Routing Master.