1 #define TRACE_NAME "RoutingReceiver"
2 #include "artdaq/DAQdata/Globals.hh"
5 #include <netinet/in.h>
7 #include <sys/socket.h>
12 #include "artdaq/Application/LoadParameterSet.hh"
14 #include "artdaq/DAQrate/detail/RoutingPacket.hh"
15 #include "artdaq/TransferPlugins/detail/HostMap.hh"
16 #include "canvas/Utilities/Exception.h"
17 #include "fhiclcpp/types/Atom.h"
18 #include "fhiclcpp/types/OptionalTable.h"
19 #include "fhiclcpp/types/TableFragment.h"
20 #include "proto/artdaqapp.hh"
33 fhicl::Atom<size_t>
collection_time_ms{fhicl::Name{
"collection_time_ms"}, fhicl::Comment{
"Time to collect routing table updates between printing summaries"}, 1000};
35 fhicl::Atom<bool>
print_verbose_info{fhicl::Name{
"print_verbose_info"}, fhicl::Comment{
"Print verbose information about each receiver detected in routing tables"},
true};
37 fhicl::Atom<size_t>
graph_width{fhicl::Name{
"graph_width"}, fhicl::Comment{
"Width of the summary graph"}, 40};
50 , routing_table_last_(0)
52 TLOG(TLVL_DEBUG) <<
"Received pset: " << pset.to_string();
56 auto rmConfig = pset.get<fhicl::ParameterSet>(
"routing_table_config", fhicl::ParameterSet());
57 use_routing_manager_ = rmConfig.get<
bool>(
"use_routing_manager",
false);
58 table_port_ = rmConfig.get<
int>(
"table_update_port", 35556);
59 table_address_ = rmConfig.get<std::string>(
"table_update_address",
"227.128.12.28");
63 if (use_routing_manager_)
65 startTableReceiverThread_();
74 TLOG(TLVL_DEBUG) <<
"Shutting down RoutingReceiver BEGIN";
78 if (routing_thread_.joinable())
80 routing_thread_.join();
87 TLOG(TLVL_DEBUG) <<
"Shutting down RoutingReceiver END.";
96 std::unique_lock<std::mutex> lk(routing_mutex_);
97 std::map<Fragment::sequence_id_t, int> routing_table_copy(routing_table_);
98 return routing_table_copy;
107 std::unique_lock<std::mutex> lk(routing_mutex_);
108 std::map<Fragment::sequence_id_t, int> routing_table_copy(routing_table_);
109 routing_table_.clear();
110 return routing_table_copy;
125 void setupTableListener_()
128 table_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
129 if (table_socket_ < 0)
131 TLOG(TLVL_ERROR) <<
"Error creating socket for receiving table updates!";
135 struct sockaddr_in si_me_request;
138 if (setsockopt(table_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
140 TLOG(TLVL_ERROR) <<
" Unable to enable port reuse on request socket";
143 memset(&si_me_request, 0,
sizeof(si_me_request));
144 si_me_request.sin_family = AF_INET;
145 si_me_request.sin_port = htons(table_port_);
147 struct in_addr in_addr_s;
148 sts = inet_aton(table_address_.c_str(), &in_addr_s);
151 TLOG(TLVL_ERROR) <<
"inet_aton says table_address " << table_address_ <<
" is invalid";
153 si_me_request.sin_addr.s_addr = in_addr_s.s_addr;
154 if (bind(table_socket_, reinterpret_cast<struct sockaddr*>(&si_me_request),
sizeof(si_me_request)) == -1)
156 TLOG(TLVL_ERROR) <<
"Cannot bind request socket to port " << table_port_;
161 sts =
ResolveHost(table_address_.c_str(), mreq.imr_multiaddr);
164 TLOG(TLVL_ERROR) <<
"Unable to resolve multicast address for table updates";
167 mreq.imr_interface.s_addr = htonl(INADDR_ANY);
168 if (setsockopt(table_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq,
sizeof(mreq)) < 0)
170 TLOG(TLVL_ERROR) <<
"Unable to join multicast group";
174 void startTableReceiverThread_()
176 if (routing_thread_.joinable())
178 routing_thread_.join();
180 TLOG(TLVL_INFO) <<
"Starting Routing Thread";
183 routing_thread_ = boost::thread(&RoutingReceiver::receiveTableUpdatesLoop_,
this);
185 catch (
const boost::exception& e)
187 TLOG(TLVL_ERROR) <<
"Caught boost::exception starting Routing Table Receive thread: " << boost::diagnostic_information(e) <<
", errno=" << errno;
188 std::cerr <<
"Caught boost::exception starting Routing Table Receive thread: " << boost::diagnostic_information(e) <<
", errno=" << errno << std::endl;
192 void receiveTableUpdatesLoop_()
198 TLOG(TLVL_DEBUG) << __func__ <<
": should_stop is " << std::boolalpha << should_stop_ <<
", stopping";
202 TLOG(TLVL_TRACE) << __func__ <<
": Polling table socket for new routes";
203 if (table_socket_ == -1)
205 TLOG(TLVL_DEBUG) << __func__ <<
": Opening table listener socket";
206 setupTableListener_();
208 if (table_socket_ == -1)
210 TLOG(TLVL_DEBUG) << __func__ <<
": The listen socket was not opened successfully.";
215 fd.fd = table_socket_;
216 fd.events = POLLIN | POLLPRI;
218 auto res = poll(&fd, 1, 1000);
221 auto first = artdaq::Fragment::InvalidSequenceID;
222 auto last = artdaq::Fragment::InvalidSequenceID;
223 std::vector<uint8_t> buf(MAX_ROUTING_TABLE_SIZE);
226 TLOG(TLVL_DEBUG) << __func__ <<
": Going to receive RoutingPacketHeader";
227 struct sockaddr_in from;
228 socklen_t len =
sizeof(from);
229 auto stss = recvfrom(table_socket_, &buf[0], MAX_ROUTING_TABLE_SIZE, 0, reinterpret_cast<struct sockaddr*>(&from), &len);
230 TLOG(TLVL_DEBUG) << __func__ <<
": Received " << stss <<
" bytes from " << inet_ntoa(from.sin_addr) <<
":" << from.sin_port;
232 if (stss > static_cast<ssize_t>(
sizeof(hdr)))
238 TLOG(TLVL_TRACE) << __func__ <<
": Incorrect size received. Discarding.";
242 TRACE(TLVL_DEBUG,
"receiveTableUpdatesLoop_: Checking for valid header with nEntries=%lu headerData:0x%016lx%016lx", hdr.
nEntries, ((
unsigned long*)&hdr)[0], ((
unsigned long*)&hdr)[1]);
243 if (hdr.
header != ROUTING_MAGIC)
245 TLOG(TLVL_TRACE) << __func__ <<
": non-RoutingPacket received. No ROUTING_MAGIC. size(bytes)=" << stss;
252 TRACE(6,
"receiveTableUpdatesLoop_: Received a packet of %ld bytes. 1st 16 bytes: 0x%016lx%016lx", stss, ((
unsigned long*)&buffer[0])[0], ((
unsigned long*)&buffer[0])[1]);
254 first = buffer[0].sequence_id;
255 last = buffer[buffer.size() - 1].sequence_id;
257 if (first + hdr.
nEntries - 1 != last)
259 TLOG(TLVL_ERROR) << __func__ <<
": Skipping this RoutingPacket because the first (" << first <<
") and last (" << last <<
") entries are inconsistent (sz=" << hdr.
nEntries <<
")!";
262 auto thisSeqID = first;
265 std::unique_lock<std::mutex> lck(routing_mutex_);
266 if (routing_table_.count(last) == 0)
268 for (
auto entry : buffer)
270 if (thisSeqID != entry.sequence_id)
272 TLOG(TLVL_ERROR) << __func__ <<
": Aborting processing of this RoutingPacket because I encountered an inconsistent entry (seqid=" << entry.sequence_id <<
", expected=" << thisSeqID <<
")!";
273 last = thisSeqID - 1;
277 if (routing_table_.count(entry.sequence_id) != 0u)
279 if (routing_table_[entry.sequence_id] != entry.destination_rank)
281 TLOG(TLVL_ERROR) << __func__ <<
": Detected routing table corruption! Recevied update specifying that sequence ID " << entry.sequence_id
282 <<
" should go to rank " << entry.destination_rank <<
", but I had already been told to send it to " << routing_table_[entry.sequence_id] <<
"!"
283 <<
" I will use the original value!";
287 if (entry.sequence_id < routing_table_last_)
291 routing_table_[entry.sequence_id] = entry.destination_rank;
292 TLOG(TLVL_DEBUG) << __func__ <<
": (my_rank=" << my_rank <<
") received update: SeqID " << entry.sequence_id
293 <<
" -> Rank " << entry.destination_rank;
297 TLOG(TLVL_DEBUG) << __func__ <<
": There are now " << routing_table_.size() <<
" entries in the Routing Table";
298 if (!routing_table_.empty())
300 TLOG(TLVL_DEBUG) << __func__ <<
": Last routing table entry is seqID=" << routing_table_.rbegin()->first;
304 for (
auto& entry : routing_table_)
306 TLOG(45) <<
"Routing Table Entry" << counter <<
": " << entry.first <<
" -> " << entry.second;
311 if (last > routing_table_last_)
313 routing_table_last_ = last;
321 bool use_routing_manager_;
322 std::atomic<bool> should_stop_;
324 std::string table_address_;
326 std::map<Fragment::sequence_id_t, int> routing_table_;
327 Fragment::sequence_id_t routing_table_last_;
328 mutable std::mutex routing_mutex_;
329 boost::thread routing_thread_;
334 static bool sighandler_init =
false;
335 static bool should_stop =
false;
336 static void signal_handler(
int signum)
339 TRACE_STREAMER(TLVL_ERROR, &(
"routingReceiver")[0], 0, 0, 0) <<
"A signal of type " << signum <<
" was caught by routingReceiver. Stopping receive loop!";
344 pthread_sigmask(SIG_UNBLOCK,
nullptr, &set);
345 pthread_sigmask(SIG_UNBLOCK, &set,
nullptr);
348 int main(
int argc,
char* argv[])
351 artdaq::configureMessageFacility(
"RoutingReceiver",
false,
false);
352 static std::mutex sighandler_mutex;
353 std::unique_lock<std::mutex> lk(sighandler_mutex);
355 if (!sighandler_init)
357 sighandler_init =
true;
358 std::vector<int> signals = {SIGINT, SIGTERM, SIGUSR1, SIGUSR2};
359 for (
auto signal : signals)
361 struct sigaction old_action;
362 sigaction(signal,
nullptr, &old_action);
366 if (old_action.sa_handler != SIG_IGN)
368 struct sigaction action;
369 action.sa_handler = signal_handler;
370 sigemptyset(&action.sa_mask);
371 for (
auto sigblk : signals)
373 sigaddset(&action.sa_mask, sigblk);
378 sigaction(signal, &action,
nullptr);
383 fhicl::ParameterSet init_ps = LoadParameterSet<artdaq::RoutingReceiver::Config>(argc, argv,
"routingReceiver",
"This application receives Routing Tables, and calculates statistics about the usage of the receivers");
384 auto config_ps = init_ps.get<fhicl::ParameterSet>(
"daq", init_ps);
385 auto metric_ps = config_ps.get<fhicl::ParameterSet>(
"metrics", config_ps);
386 auto fr_ps = config_ps.get<fhicl::ParameterSet>(
"fragment_receiver", config_ps);
390 auto host_map = rr.GetHostMap();
392 auto collection_time_ms = init_ps.get<
size_t>(
"collection_time_ms", 1000);
393 auto max_graph_width = init_ps.get<
size_t>(
"max_graph_width", 100);
394 bool print_verbose = init_ps.get<
bool>(
"print_verbose_info",
true);
395 bool verbose_clear_screen = init_ps.get<
bool>(
"clear_screen",
true);
397 auto blue =
"\033[34m";
398 auto cyan =
"\033[36m";
399 auto green =
"\033[32m";
400 auto yellow =
"\033[93m";
401 auto red =
"\033[31m";
403 metricMan->initialize(metric_ps,
"RoutingReceiver");
404 metricMan->do_start();
405 if (print_verbose && verbose_clear_screen)
407 std::cout <<
"\033[2J";
410 std::map<int, int> receiver_table = std::map<int, int>();
414 auto start_time = std::chrono::steady_clock::now();
416 auto this_table = rr.GetAndClearRoutingTable();
418 if (!this_table.empty())
420 auto graph_width = this_table.size();
422 auto graph_width_orig = graph_width;
423 while (graph_width > max_graph_width)
426 graph_width = graph_width_orig / n;
429 for (
auto& entry : this_table)
431 receiver_table[entry.second]++;
434 auto average_entries_per_receiver = this_table.size() / receiver_table.size();
437 auto cyan_threshold = ((average_entries_per_receiver - offset) / 2) / n;
438 auto green_threshold = (average_entries_per_receiver - offset) / n;
439 auto yellow_threshold = (average_entries_per_receiver + offset) / n;
440 auto red_threshold = (2 * average_entries_per_receiver) / n;
442 TLOG(TLVL_TRACE) <<
"CT: " << cyan_threshold <<
", GT: " << green_threshold <<
", YT: " << yellow_threshold <<
", RT: " << red_threshold;
444 std::ostringstream report;
445 std::ostringstream verbose_report;
447 if (print_verbose && verbose_clear_screen)
449 std::cout <<
"\033[;H\033[J";
452 report << artdaq::TimeUtils::gettimeofday_us() <<
": " << this_table.size() <<
" Entries, ";
453 for (
auto& receiver : receiver_table)
455 auto percent =
static_cast<int>(receiver.second * 100 / this_table.size());
456 report << receiver.first <<
": " << receiver.second <<
" (" << percent <<
"%), ";
459 verbose_report << receiver.first <<
": " << receiver.second <<
" (" << percent <<
"%)\t[";
461 size_t graph_characters = receiver.second / n;
463 for (
size_t ii = 0; ii < graph_characters; ++ii)
465 if (ii < cyan_threshold)
467 verbose_report << blue;
469 else if (ii < green_threshold)
471 verbose_report << cyan;
473 else if (ii < yellow_threshold)
475 verbose_report << green;
477 else if (ii < red_threshold)
479 verbose_report << yellow;
483 verbose_report << red;
485 verbose_report <<
"|";
487 std::string spaces = std::string(graph_width - graph_characters,
' ');
488 verbose_report <<
"\033[0m" << spaces <<
"]" << std::endl;
492 TLOG(TLVL_INFO) << report.str();
493 std::cout << report.str() << std::endl;
496 std::cout << verbose_report.str() << std::endl;
499 std::this_thread::sleep_until(start_time + std::chrono::milliseconds(collection_time_ms));
502 metricMan->do_stop();
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
A row of the Routing Table.
~RoutingReceiver()
RoutingReceiver Destructor.
fhicl::Atom< bool > print_verbose_info
"print_verbose_info" (Default: true): Print verbose information about each receiver detected in routi...
static void CleanUpGlobals()
Clean up statically-allocated Manager class instances.
fhicl::Atom< size_t > collection_time_ms
"collection_time_ms": Time to collect routing table updates between printing summaries ...
RoutingReceiver(fhicl::ParameterSet const &pset)
RoutingReceiver Constructor.
fhicl::TableFragment< artdaq::artdaqapp::Config > artdaqAppConfig
Configuration for artdaq Application (BoardReader, etc)
Accepted configuration parameters for RoutingReceiver.
std::map< Fragment::sequence_id_t, int > GetRoutingTable()
Get the current routing table.
fhicl::WrappedTable< Config > Parameters
Used for ParameterSet validation (if desired)
std::vector< RoutingPacketEntry > RoutingPacket
A RoutingPacket is simply a vector of RoutingPacketEntry objects. It is not suitable for network tran...
Class which receives routing tables and prints updates.
hostMap_t GetHostMap()
Get the host map.
std::map< Fragment::sequence_id_t, int > GetAndClearRoutingTable()
Get the current routing table, additionally clearing all entries.
std::map< int, std::string > hostMap_t
The host_map is a map associating ranks with artdaq::DestinationInfo objects.
hostMap_t MakeHostMap(fhicl::ParameterSet const &pset, hostMap_t map=hostMap_t())
Make a hostMap_t from a HostMap::Config ParameterSet
fhicl::Atom< size_t > graph_width
"graph_width": Width of the summary graph