1 #define TRACE_NAME "RoutingReceiver"
2 #include "artdaq/DAQdata/Globals.hh"
5 #include "canvas/Utilities/Exception.h"
7 #include <netinet/in.h>
12 #include <sys/socket.h>
14 #include "artdaq/Application/LoadParameterSet.hh"
15 #include "artdaq/DAQrate/detail/RoutingPacket.hh"
16 #include "artdaq/TransferPlugins/detail/HostMap.hh"
17 #include "proto/artdaqapp.hh"
18 #include "fhiclcpp/types/Atom.h"
19 #include "fhiclcpp/types/OptionalTable.h"
20 #include "fhiclcpp/types/TableFragment.h"
30 fhicl::Atom<size_t>
collection_time_ms{ fhicl::Name{
"collection_time_ms" }, fhicl::Comment{
"Time to collect routing table updates between printing summaries" }, 1000 };
32 fhicl::Atom<bool>
print_verbose_info{ fhicl::Name{
"print_verbose_info" }, fhicl::Comment{
"Print verbose information about each receiver detected in routing tables" },
true };
34 fhicl::Atom<size_t>
graph_width{ fhicl::Name{
"graph_width" }, fhicl::Comment{
"Width of the summary graph" }, 40 };
37 using Parameters = fhicl::WrappedTable<Config>;
42 , routing_table_last_(0)
44 TLOG(TLVL_DEBUG) <<
"Received pset: " << pset.to_string();
48 auto rmConfig = pset.get<fhicl::ParameterSet>(
"routing_table_config", fhicl::ParameterSet());
49 use_routing_master_ = rmConfig.get<
bool>(
"use_routing_master",
false);
50 table_port_ = rmConfig.get<
int>(
"table_update_port", 35556);
51 table_address_ = rmConfig.get<std::string>(
"table_update_address",
"227.128.12.28");
55 if (use_routing_master_) startTableReceiverThread_();
60 TLOG(TLVL_DEBUG) <<
"Shutting down RoutingReceiver BEGIN";
62 if (routing_thread_.joinable()) routing_thread_.join();
63 TLOG(TLVL_DEBUG) <<
"Shutting down RoutingReceiver END.";
66 std::map<Fragment::sequence_id_t, int> GetRoutingTable()
68 std::unique_lock<std::mutex> lk(routing_mutex_);
69 std::map<Fragment::sequence_id_t, int> routing_table_copy(routing_table_);
70 return routing_table_copy;
73 std::map<Fragment::sequence_id_t, int> GetAndClearRoutingTable()
75 std::unique_lock<std::mutex> lk(routing_mutex_);
76 std::map<Fragment::sequence_id_t, int> routing_table_copy(routing_table_);
77 routing_table_.clear();
78 return routing_table_copy;
81 hostMap_t GetHostMap() {
return host_map_; }
84 void setupTableListener_()
87 table_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
88 if (table_socket_ < 0)
90 TLOG(TLVL_ERROR) <<
"Error creating socket for receiving table updates!";
94 struct sockaddr_in si_me_request;
97 if (setsockopt(table_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
99 TLOG(TLVL_ERROR) <<
" Unable to enable port reuse on request socket";
102 memset(&si_me_request, 0,
sizeof(si_me_request));
103 si_me_request.sin_family = AF_INET;
104 si_me_request.sin_port = htons(table_port_);
106 struct in_addr in_addr_s;
107 sts = inet_aton(table_address_.c_str(), &in_addr_s);
110 TLOG(TLVL_ERROR) <<
"inet_aton says table_address " << table_address_ <<
" is invalid";
112 si_me_request.sin_addr.s_addr = in_addr_s.s_addr;
113 if (bind(table_socket_, (
struct sockaddr *)&si_me_request,
sizeof(si_me_request)) == -1)
115 TLOG(TLVL_ERROR) <<
"Cannot bind request socket to port " << table_port_;
120 sts =
ResolveHost(table_address_.c_str(), mreq.imr_multiaddr);
123 TLOG(TLVL_ERROR) <<
"Unable to resolve multicast address for table updates";
126 mreq.imr_interface.s_addr = htonl(INADDR_ANY);
127 if (setsockopt(table_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq,
sizeof(mreq)) < 0)
129 TLOG(TLVL_ERROR) <<
"Unable to join multicast group";
133 void startTableReceiverThread_()
135 if (routing_thread_.joinable()) routing_thread_.join();
136 TLOG(TLVL_INFO) <<
"Starting Routing Thread";
138 routing_thread_ = boost::thread(&RoutingReceiver::receiveTableUpdatesLoop_,
this);
140 catch (
const boost::exception& e)
142 TLOG(TLVL_ERROR) <<
"Caught boost::exception starting Routing Table Receive thread: " << boost::diagnostic_information(e) <<
", errno=" << errno;
143 std::cerr <<
"Caught boost::exception starting Routing Table Receive thread: " << boost::diagnostic_information(e) <<
", errno=" << errno << std::endl;
147 void receiveTableUpdatesLoop_()
153 TLOG(TLVL_DEBUG) << __func__ <<
": should_stop is " << std::boolalpha << should_stop_ <<
", stopping";
157 TLOG(TLVL_TRACE) << __func__ <<
": Polling table socket for new routes";
158 if (table_socket_ == -1)
160 TLOG(TLVL_DEBUG) << __func__ <<
": Opening table listener socket";
161 setupTableListener_();
163 if (table_socket_ == -1)
165 TLOG(TLVL_DEBUG) << __func__ <<
": The listen socket was not opened successfully.";
170 fd.fd = table_socket_;
171 fd.events = POLLIN | POLLPRI;
173 auto res = poll(&fd, 1, 1000);
176 auto first = artdaq::Fragment::InvalidSequenceID;
177 auto last = artdaq::Fragment::InvalidSequenceID;
178 std::vector<uint8_t> buf(MAX_ROUTING_TABLE_SIZE);
181 TLOG(TLVL_DEBUG) << __func__ <<
": Going to receive RoutingPacketHeader";
182 struct sockaddr_in from;
183 socklen_t len =
sizeof(from);
184 auto stss = recvfrom(table_socket_, &buf[0], MAX_ROUTING_TABLE_SIZE, 0, (
struct sockaddr*)&from, &len);
185 TLOG(TLVL_DEBUG) << __func__ <<
": Received " << stss <<
" bytes from " << inet_ntoa(from.sin_addr) <<
":" << from.sin_port;
187 if (stss > static_cast<ssize_t>(
sizeof(hdr)))
193 TLOG(TLVL_TRACE) << __func__ <<
": Incorrect size received. Discarding.";
197 TRACE(TLVL_DEBUG,
"receiveTableUpdatesLoop_: Checking for valid header with nEntries=%lu headerData:0x%016lx%016lx", hdr.
nEntries, ((
unsigned long*)&hdr)[0], ((
unsigned long*)&hdr)[1]);
198 if (hdr.
header != ROUTING_MAGIC)
200 TLOG(TLVL_TRACE) << __func__ <<
": non-RoutingPacket received. No ROUTING_MAGIC. size(bytes)=" << stss;
207 TRACE(6,
"receiveTableUpdatesLoop_: Received a packet of %ld bytes. 1st 16 bytes: 0x%016lx%016lx", stss, ((
unsigned long*)&buffer[0])[0], ((
unsigned long*)&buffer[0])[1]);
209 first = buffer[0].sequence_id;
210 last = buffer[buffer.size() - 1].sequence_id;
212 if (first + hdr.
nEntries - 1 != last)
214 TLOG(TLVL_ERROR) << __func__ <<
": Skipping this RoutingPacket because the first (" << first <<
") and last (" << last <<
") entries are inconsistent (sz=" << hdr.
nEntries <<
")!";
217 auto thisSeqID = first;
220 std::unique_lock<std::mutex> lck(routing_mutex_);
221 if (routing_table_.count(last) == 0)
223 for (
auto entry : buffer)
225 if (thisSeqID != entry.sequence_id)
227 TLOG(TLVL_ERROR) << __func__ <<
": Aborting processing of this RoutingPacket because I encountered an inconsistent entry (seqid=" << entry.sequence_id <<
", expected=" << thisSeqID <<
")!";
228 last = thisSeqID - 1;
232 if (routing_table_.count(entry.sequence_id))
234 if (routing_table_[entry.sequence_id] != entry.destination_rank)
236 TLOG(TLVL_ERROR) << __func__ <<
": Detected routing table corruption! Recevied update specifying that sequence ID " << entry.sequence_id
237 <<
" should go to rank " << entry.destination_rank <<
", but I had already been told to send it to " << routing_table_[entry.sequence_id] <<
"!"
238 <<
" I will use the original value!";
242 if (entry.sequence_id < routing_table_last_)
continue;
243 routing_table_[entry.sequence_id] = entry.destination_rank;
244 TLOG(TLVL_DEBUG) << __func__ <<
": (my_rank=" << my_rank <<
") received update: SeqID " << entry.sequence_id
245 <<
" -> Rank " << entry.destination_rank;
249 TLOG(TLVL_DEBUG) << __func__ <<
": There are now " << routing_table_.size() <<
" entries in the Routing Table";
250 if (routing_table_.size() > 0) TLOG(TLVL_DEBUG) << __func__ <<
": Last routing table entry is seqID=" << routing_table_.rbegin()->first;
253 for (
auto& entry : routing_table_)
255 TLOG(45) <<
"Routing Table Entry" << counter <<
": " << entry.first <<
" -> " << entry.second;
260 if (last > routing_table_last_) routing_table_last_ = last;
267 bool use_routing_master_;
268 std::atomic<bool> should_stop_;
270 std::string table_address_;
272 std::map<Fragment::sequence_id_t, int> routing_table_;
273 Fragment::sequence_id_t routing_table_last_;
274 mutable std::mutex routing_mutex_;
275 boost::thread routing_thread_;
281 static bool sighandler_init =
false;
282 static bool should_stop =
false;
283 static void signal_handler(
int signum)
286 TRACE_STREAMER(TLVL_ERROR, &(
"routingReceiver")[0], 0, 0, 0) <<
"A signal of type " << signum <<
" was caught by routingReceiver. Stopping receive loop!";
291 pthread_sigmask(SIG_UNBLOCK, NULL, &set);
292 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
296 int main(
int argc,
char* argv[])
298 artdaq::configureMessageFacility(
"RoutingReceiver",
false,
false);
299 static std::mutex sighandler_mutex;
300 std::unique_lock<std::mutex> lk(sighandler_mutex);
302 if (!sighandler_init)
304 sighandler_init =
true;
305 std::vector<int> signals = { SIGINT, SIGTERM, SIGUSR1, SIGUSR2 };
306 for (
auto signal : signals)
308 struct sigaction old_action;
309 sigaction(signal, NULL, &old_action);
313 if (old_action.sa_handler != SIG_IGN)
315 struct sigaction action;
316 action.sa_handler = signal_handler;
317 sigemptyset(&action.sa_mask);
318 for (
auto sigblk : signals)
320 sigaddset(&action.sa_mask, sigblk);
325 sigaction(signal, &action, NULL);
330 fhicl::ParameterSet init_ps = LoadParameterSet<artdaq::RoutingReceiver::Config>(argc, argv,
"routingReceiver",
"This application receives Routing Tables, and calculates statistics about the usage of the receivers");
331 fhicl::ParameterSet config_ps = init_ps.get<fhicl::ParameterSet>(
"daq", init_ps);
332 fhicl::ParameterSet metric_ps = config_ps.get<fhicl::ParameterSet>(
"metrics", config_ps);
333 fhicl::ParameterSet fr_ps = config_ps.get<fhicl::ParameterSet>(
"fragment_receiver", config_ps);
337 auto host_map = rr.GetHostMap();
339 size_t collection_time_ms = init_ps.get<
size_t>(
"collection_time_ms", 1000);
340 size_t max_graph_width = init_ps.get<
size_t>(
"max_graph_width", 100);
341 bool print_verbose = init_ps.get<
bool>(
"print_verbose_info",
true);
342 bool verbose_clear_screen = init_ps.get<
bool>(
"clear_screen",
true);
344 auto blue =
"\033[34m";
345 auto cyan =
"\033[36m";
346 auto green =
"\033[32m";
347 auto yellow =
"\033[93m";
348 auto red =
"\033[31m";
350 metricMan->initialize(metric_ps,
"RoutingReceiver");
351 metricMan->do_start();
352 if (print_verbose && verbose_clear_screen) std::cout <<
"\033[2J";
354 std::map<int, int> receiver_table = std::map<int,int>();
358 auto start_time = std::chrono::steady_clock::now();
360 auto this_table = rr.GetAndClearRoutingTable();
362 if (this_table.size() > 0)
364 auto graph_width = this_table.size();
366 auto graph_width_orig = graph_width;
367 while (graph_width > max_graph_width)
370 graph_width = graph_width_orig / n;
373 for (
auto& entry : this_table)
375 receiver_table[entry.second]++;
378 auto average_entries_per_receiver = this_table.size() / receiver_table.size();
381 auto cyan_threshold = ((average_entries_per_receiver - offset) / 2) / n;
382 auto green_threshold = (average_entries_per_receiver - offset) / n;
383 auto yellow_threshold = (average_entries_per_receiver + offset) / n;
384 auto red_threshold = (2 * average_entries_per_receiver) / n;
386 TLOG(TLVL_TRACE) <<
"CT: " << cyan_threshold <<
", GT: " << green_threshold <<
", YT: " << yellow_threshold <<
", RT: " << red_threshold;
388 std::ostringstream report;
389 std::ostringstream verbose_report;
391 if (print_verbose && verbose_clear_screen) std::cout <<
"\033[;H\033[J";
393 report << artdaq::TimeUtils::gettimeofday_us() <<
": " << this_table.size() <<
" Entries, ";
394 for (
auto& receiver : receiver_table)
396 auto percent =
static_cast<int>(receiver.second * 100 / this_table.size());
397 report << receiver.first <<
": " << receiver.second <<
" (" << percent <<
"%), ";
400 verbose_report << receiver.first <<
": " << receiver.second <<
" (" << percent <<
"%)\t[";
402 size_t graph_characters = receiver.second / n;
404 for (
size_t ii = 0; ii < graph_characters; ++ii)
406 if (ii < cyan_threshold)
408 verbose_report << blue;
410 else if (ii < green_threshold)
412 verbose_report << cyan;
414 else if (ii < yellow_threshold)
416 verbose_report << green;
418 else if (ii < red_threshold)
420 verbose_report << yellow;
424 verbose_report << red;
426 verbose_report <<
"|";
428 std::string spaces = std::string(graph_width - graph_characters,
' ');
429 verbose_report <<
"\033[0m" << spaces <<
"]" << std::endl;
433 TLOG(TLVL_INFO) << report.str();
434 std::cout << report.str() << std::endl;
435 if(print_verbose) std::cout << verbose_report.str() << std::endl;
437 std::this_thread::sleep_until(start_time + std::chrono::milliseconds(collection_time_ms));
440 metricMan->do_stop();
441 artdaq::Globals::CleanUpGlobals();
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
A row of the Routing Table.
hostMap_t MakeHostMap(fhicl::ParameterSet pset, hostMap_t map=hostMap_t())
Make a hostMap_t from a HostMap::Config ParameterSet
fhicl::Atom< bool > print_verbose_info
"print_verbose_info" (Default: true): Print verbose information about each receiver detected in routi...
fhicl::Atom< size_t > collection_time_ms
"collection_time_ms": Time to collect routing table updates between printing summaries ...
fhicl::TableFragment< artdaq::artdaqapp::Config > artdaqAppConfig
Configuration for artdaq Application (BoardReader, etc)
std::vector< RoutingPacketEntry > RoutingPacket
A RoutingPacket is simply a vector of RoutingPacketEntry objects. It is not suitable for network tran...
std::map< int, std::string > hostMap_t
The host_map is a map associating ranks with artdaq::DestinationInfo objects.
fhicl::Atom< size_t > graph_width
"graph_width": Width of the summary graph