1 #define TRACE_NAME "RoutingReceiver"
2 #include "artdaq/DAQdata/Globals.hh"
5 #include <netinet/in.h>
8 #include <sys/socket.h>
12 #include "artdaq/Application/LoadParameterSet.hh"
14 #include "artdaq/DAQrate/detail/RoutingPacket.hh"
15 #include "artdaq/TransferPlugins/detail/HostMap.hh"
16 #include "canvas/Utilities/Exception.h"
17 #include "fhiclcpp/types/Atom.h"
18 #include "fhiclcpp/types/OptionalTable.h"
19 #include "fhiclcpp/types/TableFragment.h"
20 #include "proto/artdaqapp.hh"
33 fhicl::Atom<size_t>
collection_time_ms{fhicl::Name{
"collection_time_ms"}, fhicl::Comment{
"Time to collect routing table updates between printing summaries"}, 1000};
35 fhicl::Atom<bool>
print_verbose_info{fhicl::Name{
"print_verbose_info"}, fhicl::Comment{
"Print verbose information about each receiver detected in routing tables"},
true};
37 fhicl::Atom<size_t>
graph_width{fhicl::Name{
"graph_width"}, fhicl::Comment{
"Width of the summary graph"}, 40};
50 , routing_table_last_(0)
52 TLOG(TLVL_DEBUG) <<
"Received pset: " << pset.to_string();
56 auto rmConfig = pset.get<fhicl::ParameterSet>(
"routing_table_config", fhicl::ParameterSet());
57 use_routing_master_ = rmConfig.get<
bool>(
"use_routing_master",
false);
58 table_port_ = rmConfig.get<
int>(
"table_update_port", 35556);
59 table_address_ = rmConfig.get<std::string>(
"table_update_address",
"227.128.12.28");
63 if (use_routing_master_) startTableReceiverThread_();
71 TLOG(TLVL_DEBUG) <<
"Shutting down RoutingReceiver BEGIN";
73 if (routing_thread_.joinable()) routing_thread_.join();
74 TLOG(TLVL_DEBUG) <<
"Shutting down RoutingReceiver END.";
83 std::unique_lock<std::mutex> lk(routing_mutex_);
84 std::map<Fragment::sequence_id_t, int> routing_table_copy(routing_table_);
85 return routing_table_copy;
94 std::unique_lock<std::mutex> lk(routing_mutex_);
95 std::map<Fragment::sequence_id_t, int> routing_table_copy(routing_table_);
96 routing_table_.clear();
97 return routing_table_copy;
107 void setupTableListener_()
110 table_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
111 if (table_socket_ < 0)
113 TLOG(TLVL_ERROR) <<
"Error creating socket for receiving table updates!";
117 struct sockaddr_in si_me_request;
120 if (setsockopt(table_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
122 TLOG(TLVL_ERROR) <<
" Unable to enable port reuse on request socket";
125 memset(&si_me_request, 0,
sizeof(si_me_request));
126 si_me_request.sin_family = AF_INET;
127 si_me_request.sin_port = htons(table_port_);
129 struct in_addr in_addr_s;
130 sts = inet_aton(table_address_.c_str(), &in_addr_s);
133 TLOG(TLVL_ERROR) <<
"inet_aton says table_address " << table_address_ <<
" is invalid";
135 si_me_request.sin_addr.s_addr = in_addr_s.s_addr;
136 if (bind(table_socket_, (
struct sockaddr*)&si_me_request,
sizeof(si_me_request)) == -1)
138 TLOG(TLVL_ERROR) <<
"Cannot bind request socket to port " << table_port_;
143 sts =
ResolveHost(table_address_.c_str(), mreq.imr_multiaddr);
146 TLOG(TLVL_ERROR) <<
"Unable to resolve multicast address for table updates";
149 mreq.imr_interface.s_addr = htonl(INADDR_ANY);
150 if (setsockopt(table_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq,
sizeof(mreq)) < 0)
152 TLOG(TLVL_ERROR) <<
"Unable to join multicast group";
156 void startTableReceiverThread_()
158 if (routing_thread_.joinable()) routing_thread_.join();
159 TLOG(TLVL_INFO) <<
"Starting Routing Thread";
162 routing_thread_ = boost::thread(&RoutingReceiver::receiveTableUpdatesLoop_,
this);
164 catch (
const boost::exception& e)
166 TLOG(TLVL_ERROR) <<
"Caught boost::exception starting Routing Table Receive thread: " << boost::diagnostic_information(e) <<
", errno=" << errno;
167 std::cerr <<
"Caught boost::exception starting Routing Table Receive thread: " << boost::diagnostic_information(e) <<
", errno=" << errno << std::endl;
171 void receiveTableUpdatesLoop_()
177 TLOG(TLVL_DEBUG) << __func__ <<
": should_stop is " << std::boolalpha << should_stop_ <<
", stopping";
181 TLOG(TLVL_TRACE) << __func__ <<
": Polling table socket for new routes";
182 if (table_socket_ == -1)
184 TLOG(TLVL_DEBUG) << __func__ <<
": Opening table listener socket";
185 setupTableListener_();
187 if (table_socket_ == -1)
189 TLOG(TLVL_DEBUG) << __func__ <<
": The listen socket was not opened successfully.";
194 fd.fd = table_socket_;
195 fd.events = POLLIN | POLLPRI;
197 auto res = poll(&fd, 1, 1000);
200 auto first = artdaq::Fragment::InvalidSequenceID;
201 auto last = artdaq::Fragment::InvalidSequenceID;
202 std::vector<uint8_t> buf(MAX_ROUTING_TABLE_SIZE);
205 TLOG(TLVL_DEBUG) << __func__ <<
": Going to receive RoutingPacketHeader";
206 struct sockaddr_in from;
207 socklen_t len =
sizeof(from);
208 auto stss = recvfrom(table_socket_, &buf[0], MAX_ROUTING_TABLE_SIZE, 0, (
struct sockaddr*)&from, &len);
209 TLOG(TLVL_DEBUG) << __func__ <<
": Received " << stss <<
" bytes from " << inet_ntoa(from.sin_addr) <<
":" << from.sin_port;
211 if (stss > static_cast<ssize_t>(
sizeof(hdr)))
217 TLOG(TLVL_TRACE) << __func__ <<
": Incorrect size received. Discarding.";
221 TRACE(TLVL_DEBUG,
"receiveTableUpdatesLoop_: Checking for valid header with nEntries=%lu headerData:0x%016lx%016lx", hdr.
nEntries, ((
unsigned long*)&hdr)[0], ((
unsigned long*)&hdr)[1]);
222 if (hdr.
header != ROUTING_MAGIC)
224 TLOG(TLVL_TRACE) << __func__ <<
": non-RoutingPacket received. No ROUTING_MAGIC. size(bytes)=" << stss;
231 TRACE(6,
"receiveTableUpdatesLoop_: Received a packet of %ld bytes. 1st 16 bytes: 0x%016lx%016lx", stss, ((
unsigned long*)&buffer[0])[0], ((
unsigned long*)&buffer[0])[1]);
233 first = buffer[0].sequence_id;
234 last = buffer[buffer.size() - 1].sequence_id;
236 if (first + hdr.
nEntries - 1 != last)
238 TLOG(TLVL_ERROR) << __func__ <<
": Skipping this RoutingPacket because the first (" << first <<
") and last (" << last <<
") entries are inconsistent (sz=" << hdr.
nEntries <<
")!";
241 auto thisSeqID = first;
244 std::unique_lock<std::mutex> lck(routing_mutex_);
245 if (routing_table_.count(last) == 0)
247 for (
auto entry : buffer)
249 if (thisSeqID != entry.sequence_id)
251 TLOG(TLVL_ERROR) << __func__ <<
": Aborting processing of this RoutingPacket because I encountered an inconsistent entry (seqid=" << entry.sequence_id <<
", expected=" << thisSeqID <<
")!";
252 last = thisSeqID - 1;
256 if (routing_table_.count(entry.sequence_id))
258 if (routing_table_[entry.sequence_id] != entry.destination_rank)
260 TLOG(TLVL_ERROR) << __func__ <<
": Detected routing table corruption! Recevied update specifying that sequence ID " << entry.sequence_id
261 <<
" should go to rank " << entry.destination_rank <<
", but I had already been told to send it to " << routing_table_[entry.sequence_id] <<
"!"
262 <<
" I will use the original value!";
266 if (entry.sequence_id < routing_table_last_)
continue;
267 routing_table_[entry.sequence_id] = entry.destination_rank;
268 TLOG(TLVL_DEBUG) << __func__ <<
": (my_rank=" << my_rank <<
") received update: SeqID " << entry.sequence_id
269 <<
" -> Rank " << entry.destination_rank;
273 TLOG(TLVL_DEBUG) << __func__ <<
": There are now " << routing_table_.size() <<
" entries in the Routing Table";
274 if (routing_table_.size() > 0) TLOG(TLVL_DEBUG) << __func__ <<
": Last routing table entry is seqID=" << routing_table_.rbegin()->first;
277 for (
auto& entry : routing_table_)
279 TLOG(45) <<
"Routing Table Entry" << counter <<
": " << entry.first <<
" -> " << entry.second;
284 if (last > routing_table_last_) routing_table_last_ = last;
291 bool use_routing_master_;
292 std::atomic<bool> should_stop_;
294 std::string table_address_;
296 std::map<Fragment::sequence_id_t, int> routing_table_;
297 Fragment::sequence_id_t routing_table_last_;
298 mutable std::mutex routing_mutex_;
299 boost::thread routing_thread_;
304 static bool sighandler_init =
false;
305 static bool should_stop =
false;
306 static void signal_handler(
int signum)
309 TRACE_STREAMER(TLVL_ERROR, &(
"routingReceiver")[0], 0, 0, 0) <<
"A signal of type " << signum <<
" was caught by routingReceiver. Stopping receive loop!";
314 pthread_sigmask(SIG_UNBLOCK, NULL, &set);
315 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
318 int main(
int argc,
char* argv[])
320 artdaq::configureMessageFacility(
"RoutingReceiver",
false,
false);
321 static std::mutex sighandler_mutex;
322 std::unique_lock<std::mutex> lk(sighandler_mutex);
324 if (!sighandler_init)
326 sighandler_init =
true;
327 std::vector<int> signals = {SIGINT, SIGTERM, SIGUSR1, SIGUSR2};
328 for (
auto signal : signals)
330 struct sigaction old_action;
331 sigaction(signal, NULL, &old_action);
335 if (old_action.sa_handler != SIG_IGN)
337 struct sigaction action;
338 action.sa_handler = signal_handler;
339 sigemptyset(&action.sa_mask);
340 for (
auto sigblk : signals)
342 sigaddset(&action.sa_mask, sigblk);
347 sigaction(signal, &action, NULL);
352 fhicl::ParameterSet init_ps = LoadParameterSet<artdaq::RoutingReceiver::Config>(argc, argv,
"routingReceiver",
"This application receives Routing Tables, and calculates statistics about the usage of the receivers");
353 fhicl::ParameterSet config_ps = init_ps.get<fhicl::ParameterSet>(
"daq", init_ps);
354 fhicl::ParameterSet metric_ps = config_ps.get<fhicl::ParameterSet>(
"metrics", config_ps);
355 fhicl::ParameterSet fr_ps = config_ps.get<fhicl::ParameterSet>(
"fragment_receiver", config_ps);
359 auto host_map = rr.GetHostMap();
361 size_t collection_time_ms = init_ps.get<
size_t>(
"collection_time_ms", 1000);
362 size_t max_graph_width = init_ps.get<
size_t>(
"max_graph_width", 100);
363 bool print_verbose = init_ps.get<
bool>(
"print_verbose_info",
true);
364 bool verbose_clear_screen = init_ps.get<
bool>(
"clear_screen",
true);
366 auto blue =
"\033[34m";
367 auto cyan =
"\033[36m";
368 auto green =
"\033[32m";
369 auto yellow =
"\033[93m";
370 auto red =
"\033[31m";
372 metricMan->initialize(metric_ps,
"RoutingReceiver");
373 metricMan->do_start();
374 if (print_verbose && verbose_clear_screen) std::cout <<
"\033[2J";
376 std::map<int, int> receiver_table = std::map<int, int>();
380 auto start_time = std::chrono::steady_clock::now();
382 auto this_table = rr.GetAndClearRoutingTable();
384 if (this_table.size() > 0)
386 auto graph_width = this_table.size();
388 auto graph_width_orig = graph_width;
389 while (graph_width > max_graph_width)
392 graph_width = graph_width_orig / n;
395 for (
auto& entry : this_table)
397 receiver_table[entry.second]++;
400 auto average_entries_per_receiver = this_table.size() / receiver_table.size();
403 auto cyan_threshold = ((average_entries_per_receiver - offset) / 2) / n;
404 auto green_threshold = (average_entries_per_receiver - offset) / n;
405 auto yellow_threshold = (average_entries_per_receiver + offset) / n;
406 auto red_threshold = (2 * average_entries_per_receiver) / n;
408 TLOG(TLVL_TRACE) <<
"CT: " << cyan_threshold <<
", GT: " << green_threshold <<
", YT: " << yellow_threshold <<
", RT: " << red_threshold;
410 std::ostringstream report;
411 std::ostringstream verbose_report;
413 if (print_verbose && verbose_clear_screen) std::cout <<
"\033[;H\033[J";
415 report << artdaq::TimeUtils::gettimeofday_us() <<
": " << this_table.size() <<
" Entries, ";
416 for (
auto& receiver : receiver_table)
418 auto percent =
static_cast<int>(receiver.second * 100 / this_table.size());
419 report << receiver.first <<
": " << receiver.second <<
" (" << percent <<
"%), ";
422 verbose_report << receiver.first <<
": " << receiver.second <<
" (" << percent <<
"%)\t[";
424 size_t graph_characters = receiver.second / n;
426 for (
size_t ii = 0; ii < graph_characters; ++ii)
428 if (ii < cyan_threshold)
430 verbose_report << blue;
432 else if (ii < green_threshold)
434 verbose_report << cyan;
436 else if (ii < yellow_threshold)
438 verbose_report << green;
440 else if (ii < red_threshold)
442 verbose_report << yellow;
446 verbose_report << red;
448 verbose_report <<
"|";
450 std::string spaces = std::string(graph_width - graph_characters,
' ');
451 verbose_report <<
"\033[0m" << spaces <<
"]" << std::endl;
455 TLOG(TLVL_INFO) << report.str();
456 std::cout << report.str() << std::endl;
457 if (print_verbose) std::cout << verbose_report.str() << std::endl;
459 std::this_thread::sleep_until(start_time + std::chrono::milliseconds(collection_time_ms));
462 metricMan->do_stop();
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
A row of the Routing Table.
~RoutingReceiver()
RoutingReceiver Destructor.
hostMap_t MakeHostMap(fhicl::ParameterSet pset, hostMap_t map=hostMap_t())
Make a hostMap_t from a HostMap::Config ParameterSet
fhicl::Atom< bool > print_verbose_info
"print_verbose_info" (Default: true): Print verbose information about each receiver detected in routi...
static void CleanUpGlobals()
Clean up statically-allocated Manager class instances.
fhicl::Atom< size_t > collection_time_ms
"collection_time_ms": Time to collect routing table updates between printing summaries ...
RoutingReceiver(fhicl::ParameterSet const &pset)
RoutingReceiver Constructor.
fhicl::TableFragment< artdaq::artdaqapp::Config > artdaqAppConfig
Configuration for artdaq Application (BoardReader, etc)
Accepted configuration parameters for RoutingReceiver.
std::map< Fragment::sequence_id_t, int > GetRoutingTable()
Get the current routing table.
fhicl::WrappedTable< Config > Parameters
Used for ParameterSet validation (if desired)
std::vector< RoutingPacketEntry > RoutingPacket
A RoutingPacket is simply a vector of RoutingPacketEntry objects. It is not suitable for network tran...
Class which receives routing tables and prints updates.
hostMap_t GetHostMap()
Get the host map.
std::map< Fragment::sequence_id_t, int > GetAndClearRoutingTable()
Get the current routing table, additionally clearing all entries.
std::map< int, std::string > hostMap_t
The host_map is a map associating ranks with artdaq::DestinationInfo objects.
fhicl::Atom< size_t > graph_width
"graph_width": Width of the summary graph