1 #define TRACE_NAME "RoutingReceiver"
2 #include "artdaq/DAQdata/Globals.hh"
5 #include "canvas/Utilities/Exception.h"
7 #include <netinet/in.h>
12 #include <sys/socket.h>
14 #include "artdaq/Application/LoadParameterSet.hh"
15 #include "artdaq/DAQrate/detail/RoutingPacket.hh"
16 #include "artdaq/TransferPlugins/detail/HostMap.hh"
17 #include "proto/artdaqapp.hh"
18 #include "fhiclcpp/types/Atom.h"
19 #include "fhiclcpp/types/OptionalTable.h"
20 #include "fhiclcpp/types/TableFragment.h"
33 fhicl::Atom<size_t>
collection_time_ms{ fhicl::Name{
"collection_time_ms" }, fhicl::Comment{
"Time to collect routing table updates between printing summaries" }, 1000 };
35 fhicl::Atom<bool>
print_verbose_info{ fhicl::Name{
"print_verbose_info" }, fhicl::Comment{
"Print verbose information about each receiver detected in routing tables" },
true };
37 fhicl::Atom<size_t>
graph_width{ fhicl::Name{
"graph_width" }, fhicl::Comment{
"Width of the summary graph" }, 40 };
50 , routing_table_last_(0)
52 TLOG(TLVL_DEBUG) <<
"Received pset: " << pset.to_string();
56 auto rmConfig = pset.get<fhicl::ParameterSet>(
"routing_table_config", fhicl::ParameterSet());
57 use_routing_master_ = rmConfig.get<
bool>(
"use_routing_master",
false);
58 table_port_ = rmConfig.get<
int>(
"table_update_port", 35556);
59 table_address_ = rmConfig.get<std::string>(
"table_update_address",
"227.128.12.28");
63 if (use_routing_master_) startTableReceiverThread_();
71 TLOG(TLVL_DEBUG) <<
"Shutting down RoutingReceiver BEGIN";
73 if (routing_thread_.joinable()) routing_thread_.join();
74 TLOG(TLVL_DEBUG) <<
"Shutting down RoutingReceiver END.";
83 std::unique_lock<std::mutex> lk(routing_mutex_);
84 std::map<Fragment::sequence_id_t, int> routing_table_copy(routing_table_);
85 return routing_table_copy;
94 std::unique_lock<std::mutex> lk(routing_mutex_);
95 std::map<Fragment::sequence_id_t, int> routing_table_copy(routing_table_);
96 routing_table_.clear();
97 return routing_table_copy;
107 void setupTableListener_()
110 table_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
111 if (table_socket_ < 0)
113 TLOG(TLVL_ERROR) <<
"Error creating socket for receiving table updates!";
117 struct sockaddr_in si_me_request;
120 if (setsockopt(table_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
122 TLOG(TLVL_ERROR) <<
" Unable to enable port reuse on request socket";
125 memset(&si_me_request, 0,
sizeof(si_me_request));
126 si_me_request.sin_family = AF_INET;
127 si_me_request.sin_port = htons(table_port_);
129 struct in_addr in_addr_s;
130 sts = inet_aton(table_address_.c_str(), &in_addr_s);
133 TLOG(TLVL_ERROR) <<
"inet_aton says table_address " << table_address_ <<
" is invalid";
135 si_me_request.sin_addr.s_addr = in_addr_s.s_addr;
136 if (bind(table_socket_, (
struct sockaddr *)&si_me_request,
sizeof(si_me_request)) == -1)
138 TLOG(TLVL_ERROR) <<
"Cannot bind request socket to port " << table_port_;
143 sts =
ResolveHost(table_address_.c_str(), mreq.imr_multiaddr);
146 TLOG(TLVL_ERROR) <<
"Unable to resolve multicast address for table updates";
149 mreq.imr_interface.s_addr = htonl(INADDR_ANY);
150 if (setsockopt(table_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq,
sizeof(mreq)) < 0)
152 TLOG(TLVL_ERROR) <<
"Unable to join multicast group";
156 void startTableReceiverThread_()
158 if (routing_thread_.joinable()) routing_thread_.join();
159 TLOG(TLVL_INFO) <<
"Starting Routing Thread";
161 routing_thread_ = boost::thread(&RoutingReceiver::receiveTableUpdatesLoop_,
this);
163 catch (
const boost::exception& e)
165 TLOG(TLVL_ERROR) <<
"Caught boost::exception starting Routing Table Receive thread: " << boost::diagnostic_information(e) <<
", errno=" << errno;
166 std::cerr <<
"Caught boost::exception starting Routing Table Receive thread: " << boost::diagnostic_information(e) <<
", errno=" << errno << std::endl;
170 void receiveTableUpdatesLoop_()
176 TLOG(TLVL_DEBUG) << __func__ <<
": should_stop is " << std::boolalpha << should_stop_ <<
", stopping";
180 TLOG(TLVL_TRACE) << __func__ <<
": Polling table socket for new routes";
181 if (table_socket_ == -1)
183 TLOG(TLVL_DEBUG) << __func__ <<
": Opening table listener socket";
184 setupTableListener_();
186 if (table_socket_ == -1)
188 TLOG(TLVL_DEBUG) << __func__ <<
": The listen socket was not opened successfully.";
193 fd.fd = table_socket_;
194 fd.events = POLLIN | POLLPRI;
196 auto res = poll(&fd, 1, 1000);
199 auto first = artdaq::Fragment::InvalidSequenceID;
200 auto last = artdaq::Fragment::InvalidSequenceID;
201 std::vector<uint8_t> buf(MAX_ROUTING_TABLE_SIZE);
204 TLOG(TLVL_DEBUG) << __func__ <<
": Going to receive RoutingPacketHeader";
205 struct sockaddr_in from;
206 socklen_t len =
sizeof(from);
207 auto stss = recvfrom(table_socket_, &buf[0], MAX_ROUTING_TABLE_SIZE, 0, (
struct sockaddr*)&from, &len);
208 TLOG(TLVL_DEBUG) << __func__ <<
": Received " << stss <<
" bytes from " << inet_ntoa(from.sin_addr) <<
":" << from.sin_port;
210 if (stss > static_cast<ssize_t>(
sizeof(hdr)))
216 TLOG(TLVL_TRACE) << __func__ <<
": Incorrect size received. Discarding.";
220 TRACE(TLVL_DEBUG,
"receiveTableUpdatesLoop_: Checking for valid header with nEntries=%lu headerData:0x%016lx%016lx", hdr.
nEntries, ((
unsigned long*)&hdr)[0], ((
unsigned long*)&hdr)[1]);
221 if (hdr.
header != ROUTING_MAGIC)
223 TLOG(TLVL_TRACE) << __func__ <<
": non-RoutingPacket received. No ROUTING_MAGIC. size(bytes)=" << stss;
230 TRACE(6,
"receiveTableUpdatesLoop_: Received a packet of %ld bytes. 1st 16 bytes: 0x%016lx%016lx", stss, ((
unsigned long*)&buffer[0])[0], ((
unsigned long*)&buffer[0])[1]);
232 first = buffer[0].sequence_id;
233 last = buffer[buffer.size() - 1].sequence_id;
235 if (first + hdr.
nEntries - 1 != last)
237 TLOG(TLVL_ERROR) << __func__ <<
": Skipping this RoutingPacket because the first (" << first <<
") and last (" << last <<
") entries are inconsistent (sz=" << hdr.
nEntries <<
")!";
240 auto thisSeqID = first;
243 std::unique_lock<std::mutex> lck(routing_mutex_);
244 if (routing_table_.count(last) == 0)
246 for (
auto entry : buffer)
248 if (thisSeqID != entry.sequence_id)
250 TLOG(TLVL_ERROR) << __func__ <<
": Aborting processing of this RoutingPacket because I encountered an inconsistent entry (seqid=" << entry.sequence_id <<
", expected=" << thisSeqID <<
")!";
251 last = thisSeqID - 1;
255 if (routing_table_.count(entry.sequence_id))
257 if (routing_table_[entry.sequence_id] != entry.destination_rank)
259 TLOG(TLVL_ERROR) << __func__ <<
": Detected routing table corruption! Recevied update specifying that sequence ID " << entry.sequence_id
260 <<
" should go to rank " << entry.destination_rank <<
", but I had already been told to send it to " << routing_table_[entry.sequence_id] <<
"!"
261 <<
" I will use the original value!";
265 if (entry.sequence_id < routing_table_last_)
continue;
266 routing_table_[entry.sequence_id] = entry.destination_rank;
267 TLOG(TLVL_DEBUG) << __func__ <<
": (my_rank=" << my_rank <<
") received update: SeqID " << entry.sequence_id
268 <<
" -> Rank " << entry.destination_rank;
272 TLOG(TLVL_DEBUG) << __func__ <<
": There are now " << routing_table_.size() <<
" entries in the Routing Table";
273 if (routing_table_.size() > 0) TLOG(TLVL_DEBUG) << __func__ <<
": Last routing table entry is seqID=" << routing_table_.rbegin()->first;
276 for (
auto& entry : routing_table_)
278 TLOG(45) <<
"Routing Table Entry" << counter <<
": " << entry.first <<
" -> " << entry.second;
283 if (last > routing_table_last_) routing_table_last_ = last;
290 bool use_routing_master_;
291 std::atomic<bool> should_stop_;
293 std::string table_address_;
295 std::map<Fragment::sequence_id_t, int> routing_table_;
296 Fragment::sequence_id_t routing_table_last_;
297 mutable std::mutex routing_mutex_;
298 boost::thread routing_thread_;
304 static bool sighandler_init =
false;
305 static bool should_stop =
false;
306 static void signal_handler(
int signum)
309 TRACE_STREAMER(TLVL_ERROR, &(
"routingReceiver")[0], 0, 0, 0) <<
"A signal of type " << signum <<
" was caught by routingReceiver. Stopping receive loop!";
314 pthread_sigmask(SIG_UNBLOCK, NULL, &set);
315 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
319 int main(
int argc,
char* argv[])
321 artdaq::configureMessageFacility(
"RoutingReceiver",
false,
false);
322 static std::mutex sighandler_mutex;
323 std::unique_lock<std::mutex> lk(sighandler_mutex);
325 if (!sighandler_init)
327 sighandler_init =
true;
328 std::vector<int> signals = { SIGINT, SIGTERM, SIGUSR1, SIGUSR2 };
329 for (
auto signal : signals)
331 struct sigaction old_action;
332 sigaction(signal, NULL, &old_action);
336 if (old_action.sa_handler != SIG_IGN)
338 struct sigaction action;
339 action.sa_handler = signal_handler;
340 sigemptyset(&action.sa_mask);
341 for (
auto sigblk : signals)
343 sigaddset(&action.sa_mask, sigblk);
348 sigaction(signal, &action, NULL);
353 fhicl::ParameterSet init_ps = LoadParameterSet<artdaq::RoutingReceiver::Config>(argc, argv,
"routingReceiver",
"This application receives Routing Tables, and calculates statistics about the usage of the receivers");
354 fhicl::ParameterSet config_ps = init_ps.get<fhicl::ParameterSet>(
"daq", init_ps);
355 fhicl::ParameterSet metric_ps = config_ps.get<fhicl::ParameterSet>(
"metrics", config_ps);
356 fhicl::ParameterSet fr_ps = config_ps.get<fhicl::ParameterSet>(
"fragment_receiver", config_ps);
360 auto host_map = rr.GetHostMap();
362 size_t collection_time_ms = init_ps.get<
size_t>(
"collection_time_ms", 1000);
363 size_t max_graph_width = init_ps.get<
size_t>(
"max_graph_width", 100);
364 bool print_verbose = init_ps.get<
bool>(
"print_verbose_info",
true);
365 bool verbose_clear_screen = init_ps.get<
bool>(
"clear_screen",
true);
367 auto blue =
"\033[34m";
368 auto cyan =
"\033[36m";
369 auto green =
"\033[32m";
370 auto yellow =
"\033[93m";
371 auto red =
"\033[31m";
373 metricMan->initialize(metric_ps,
"RoutingReceiver");
374 metricMan->do_start();
375 if (print_verbose && verbose_clear_screen) std::cout <<
"\033[2J";
377 std::map<int, int> receiver_table = std::map<int,int>();
381 auto start_time = std::chrono::steady_clock::now();
383 auto this_table = rr.GetAndClearRoutingTable();
385 if (this_table.size() > 0)
387 auto graph_width = this_table.size();
389 auto graph_width_orig = graph_width;
390 while (graph_width > max_graph_width)
393 graph_width = graph_width_orig / n;
396 for (
auto& entry : this_table)
398 receiver_table[entry.second]++;
401 auto average_entries_per_receiver = this_table.size() / receiver_table.size();
404 auto cyan_threshold = ((average_entries_per_receiver - offset) / 2) / n;
405 auto green_threshold = (average_entries_per_receiver - offset) / n;
406 auto yellow_threshold = (average_entries_per_receiver + offset) / n;
407 auto red_threshold = (2 * average_entries_per_receiver) / n;
409 TLOG(TLVL_TRACE) <<
"CT: " << cyan_threshold <<
", GT: " << green_threshold <<
", YT: " << yellow_threshold <<
", RT: " << red_threshold;
411 std::ostringstream report;
412 std::ostringstream verbose_report;
414 if (print_verbose && verbose_clear_screen) std::cout <<
"\033[;H\033[J";
416 report << artdaq::TimeUtils::gettimeofday_us() <<
": " << this_table.size() <<
" Entries, ";
417 for (
auto& receiver : receiver_table)
419 auto percent =
static_cast<int>(receiver.second * 100 / this_table.size());
420 report << receiver.first <<
": " << receiver.second <<
" (" << percent <<
"%), ";
423 verbose_report << receiver.first <<
": " << receiver.second <<
" (" << percent <<
"%)\t[";
425 size_t graph_characters = receiver.second / n;
427 for (
size_t ii = 0; ii < graph_characters; ++ii)
429 if (ii < cyan_threshold)
431 verbose_report << blue;
433 else if (ii < green_threshold)
435 verbose_report << cyan;
437 else if (ii < yellow_threshold)
439 verbose_report << green;
441 else if (ii < red_threshold)
443 verbose_report << yellow;
447 verbose_report << red;
449 verbose_report <<
"|";
451 std::string spaces = std::string(graph_width - graph_characters,
' ');
452 verbose_report <<
"\033[0m" << spaces <<
"]" << std::endl;
456 TLOG(TLVL_INFO) << report.str();
457 std::cout << report.str() << std::endl;
458 if(print_verbose) std::cout << verbose_report.str() << std::endl;
460 std::this_thread::sleep_until(start_time + std::chrono::milliseconds(collection_time_ms));
463 metricMan->do_stop();
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
A row of the Routing Table.
~RoutingReceiver()
RoutingReceiver Destructor.
hostMap_t MakeHostMap(fhicl::ParameterSet pset, hostMap_t map=hostMap_t())
Make a hostMap_t from a HostMap::Config ParameterSet
fhicl::Atom< bool > print_verbose_info
"print_verbose_info" (Default: true): Print verbose information about each receiver detected in routi...
static void CleanUpGlobals()
Clean up statically-allocated Manager class instances.
fhicl::Atom< size_t > collection_time_ms
"collection_time_ms": Time to collect routing table updates between printing summaries ...
RoutingReceiver(fhicl::ParameterSet const &pset)
RoutingReceiver Constructor.
fhicl::TableFragment< artdaq::artdaqapp::Config > artdaqAppConfig
Configuration for artdaq Application (BoardReader, etc)
Accepted configuration parameters for RoutingReceiver.
std::map< Fragment::sequence_id_t, int > GetRoutingTable()
Get the current routing table.
fhicl::WrappedTable< Config > Parameters
Used for ParameterSet validation (if desired)
std::vector< RoutingPacketEntry > RoutingPacket
A RoutingPacket is simply a vector of RoutingPacketEntry objects. It is not suitable for network tran...
Class which receives routing tables and prints updates.
hostMap_t GetHostMap()
Get the host map.
std::map< Fragment::sequence_id_t, int > GetAndClearRoutingTable()
Get the current routing table, additionally clearing all entries.
std::map< int, std::string > hostMap_t
The host_map is a map associating ranks with artdaq::DestinationInfo objects.
fhicl::Atom< size_t > graph_width
"graph_width": Width of the summary graph