10 #include "canvas/Utilities/Exception.h"
11 #include "cetlib_except/exception.h"
13 #define TRACE_NAME (app_name + "_RoutingMasterCore").c_str() // include these 2 first -
14 #include "artdaq/DAQdata/Globals.hh"
15 #include "artdaq-core/Data/Fragment.hh"
16 #include "artdaq-core/Utilities/ExceptionHandler.hh"
18 #include "artdaq/Application/RoutingMasterCore.hh"
19 #include "artdaq/Application/Routing/makeRoutingMasterPolicy.hh"
29 : received_token_counter_()
30 , shutdown_requested_(false)
31 , stop_requested_(false)
32 , pause_requested_(false)
37 TLOG(TLVL_DEBUG) <<
"Constructor" ;
44 TLOG(TLVL_DEBUG) <<
"Destructor" ;
45 if (ev_token_receive_thread_.joinable()) ev_token_receive_thread_.join();
50 TLOG(TLVL_DEBUG) <<
"initialize method called with "
51 <<
"ParameterSet = \"" << pset.to_string()
55 fhicl::ParameterSet daq_pset;
58 daq_pset = pset.get<fhicl::ParameterSet>(
"daq");
63 <<
"Unable to find the DAQ parameters in the initialization "
64 <<
"ParameterSet: \"" + pset.to_string() +
"\"." ;
68 if (daq_pset.has_key(
"rank"))
70 if (my_rank >= 0 && daq_pset.get<
int>(
"rank") != my_rank) {
71 TLOG(TLVL_WARNING) <<
"Routing Master rank specified at startup is different than rank specified at configure! Using rank received at configure!";
73 my_rank = daq_pset.get<
int>(
"rank");
77 TLOG(TLVL_ERROR) <<
"Routing Master rank not specified at startup or in configuration! Aborting";
83 policy_pset_ = daq_pset.get<fhicl::ParameterSet>(
"policy");
88 <<
"Unable to find the policy parameters in the DAQ initialization ParameterSet: \"" + daq_pset.to_string() +
"\"." ;
93 fhicl::ParameterSet metric_pset;
96 metric_pset = daq_pset.get<fhicl::ParameterSet>(
"metrics");
100 if (metric_pset.is_empty())
102 TLOG(TLVL_INFO) <<
"No metric plugins appear to be defined" ;
106 metricMan->initialize(metric_pset, app_name);
110 ExceptionHandler(ExceptionHandlerRethrow::no,
111 "Error loading metrics in RoutingMasterCore::initialize()");
115 auto policy_plugin_spec = policy_pset_.get<std::string>(
"policy",
"");
116 if (policy_plugin_spec.length() == 0)
119 <<
"No fragment generator (parameter name = \"policy\") was "
120 <<
"specified in the policy ParameterSet. The "
121 <<
"DAQ initialization PSet was \"" << daq_pset.to_string() <<
"\"." ;
130 std::stringstream exception_string;
131 exception_string <<
"Exception thrown during initialization of policy of type \""
132 << policy_plugin_spec <<
"\"";
134 ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
136 TLOG(TLVL_DEBUG) <<
"FHiCL parameter set used to initialize the policy which threw an exception: " << policy_pset_.to_string() ;
141 rt_priority_ = daq_pset.get<
int>(
"rt_priority", 0);
142 sender_ranks_ = daq_pset.get<std::vector<int>>(
"sender_ranks");
143 num_receivers_ = policy_->GetReceiverCount();
145 receive_ack_events_ = std::vector<epoll_event>(sender_ranks_.size());
146 receive_token_events_ = std::vector<epoll_event>(num_receivers_ + 1);
148 auto mode = daq_pset.get<
bool>(
"senders_send_by_send_count",
false);
150 max_table_update_interval_ms_ = daq_pset.get<
size_t>(
"table_update_interval_ms", 1000);
151 current_table_interval_ms_ = max_table_update_interval_ms_;
152 max_ack_cycle_count_ = daq_pset.get<
size_t>(
"table_ack_retry_count", 5);
153 receive_token_port_ = daq_pset.get<
int>(
"routing_token_port", 35555);
154 send_tables_port_ = daq_pset.get<
int>(
"table_update_port", 35556);
155 receive_acks_port_ = daq_pset.get<
int>(
"table_acknowledge_port", 35557);
156 send_tables_address_ = daq_pset.get<std::string>(
"table_update_address",
"227.128.12.28");
157 receive_address_ = daq_pset.get<std::string>(
"routing_master_hostname",
"localhost");
160 statsHelper_.createCollectors(daq_pset, 100, 30.0, 60.0, TABLE_UPDATES_STAT_KEY);
162 shutdown_requested_.store(
false);
163 start_recieve_token_thread_();
169 stop_requested_.store(
false);
170 pause_requested_.store(
false);
172 statsHelper_.resetStatistics();
175 metricMan->do_start();
177 table_update_count_ = 0;
178 received_token_count_ = 0;
180 TLOG(TLVL_INFO) <<
"Started run " << run_id_.run() ;
186 TLOG(TLVL_INFO) <<
"Stopping run " << run_id_.run()
187 <<
" after " << table_update_count_ <<
" table updates."
188 <<
" and " << received_token_count_ <<
" received tokens." ;
189 stop_requested_.store(
true);
195 TLOG(TLVL_INFO) <<
"Pausing run " << run_id_.run()
196 <<
" after " << table_update_count_ <<
" table updates."
197 <<
" and " << received_token_count_ <<
" received tokens." ;
198 pause_requested_.store(
true);
204 TLOG(TLVL_INFO) <<
"Resuming run " << run_id_.run() ;
206 pause_requested_.store(
false);
207 metricMan->do_start();
213 shutdown_requested_.store(
true);
214 if (ev_token_receive_thread_.joinable()) ev_token_receive_thread_.join();
215 policy_.reset(
nullptr);
216 metricMan->shutdown();
222 TLOG(TLVL_INFO) <<
"soft_initialize method called with "
223 <<
"ParameterSet = \"" << pset.to_string()
225 return initialize(pset, e, f);
230 TLOG(TLVL_INFO) <<
"reinitialize method called with "
231 <<
"ParameterSet = \"" << pset.to_string()
233 return initialize(pset, e, f);
238 if (rt_priority_ > 0)
240 #pragma GCC diagnostic push
241 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
242 sched_param s_param = {};
243 s_param.sched_priority = rt_priority_;
244 if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param))
245 TLOG(TLVL_WARNING) <<
"setting realtime priority failed" ;
246 #pragma GCC diagnostic pop
252 if (rt_priority_ > 0)
254 #pragma GCC diagnostic push
255 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
256 sched_param s_param = {};
257 s_param.sched_priority = rt_priority_;
258 int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
262 <<
"Failed to set realtime priority to " << rt_priority_
263 <<
", return code = " << status ;
265 #pragma GCC diagnostic pop
270 TLOG(TLVL_DEBUG) <<
"Sending initial table." ;
271 auto startTime = artdaq::MonitoredQuantity::getCurrentTime();
272 auto nextSendTime = startTime;
274 while (!stop_requested_ && !pause_requested_)
276 startTime = artdaq::MonitoredQuantity::getCurrentTime();
278 if (startTime >= nextSendTime)
280 auto table = policy_->GetCurrentTable();
281 if (table.size() > 0)
283 send_event_table(table);
284 ++table_update_count_;
285 delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
286 statsHelper_.addSample(TABLE_UPDATES_STAT_KEY, delta_time);
287 TLOG(16) <<
"process_fragments TABLE_UPDATES_STAT_KEY=" << delta_time ;
291 TLOG(TLVL_DEBUG) <<
"No tokens received in this update interval (" << current_table_interval_ms_ <<
" ms)! This most likely means that the receivers are not keeping up!" ;
293 auto max_tokens = policy_->GetMaxNumberOfTokens();
296 auto frac = table.size() /
static_cast<double>(max_tokens);
297 if (frac > 0.75) current_table_interval_ms_ = 9 * current_table_interval_ms_ / 10;
298 if (frac < 0.5) current_table_interval_ms_ = 11 * current_table_interval_ms_ / 10;
299 if (current_table_interval_ms_ > max_table_update_interval_ms_) current_table_interval_ms_ = max_table_update_interval_ms_;
300 if (current_table_interval_ms_ < 1) current_table_interval_ms_ = 1;
302 nextSendTime = startTime + current_table_interval_ms_ / 1000.0;
303 TLOG(TLVL_DEBUG) <<
"current_table_interval_ms is now " << current_table_interval_ms_ ;
307 usleep(current_table_interval_ms_ * 10);
311 metricMan->do_stop();
317 if (table_socket_ == -1)
319 table_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
320 if (table_socket_ < 0)
322 TLOG(TLVL_ERROR) <<
"I failed to create the socket for sending Data Requests! Errno: " << errno ;
325 auto sts =
ResolveHost(send_tables_address_.c_str(), send_tables_port_, send_tables_addr_);
328 TLOG(TLVL_ERROR) <<
"Unable to resolve table_update_address" ;
333 if (receive_address_ !=
"localhost")
335 TLOG(TLVL_DEBUG) <<
"Making sure that multicast sending uses the correct interface for hostname " << receive_address_ ;
340 throw art::Exception(art::errors::Configuration) <<
"RoutingMasterCore: Unable to resolve routing_master_address" << std::endl;;
343 if (setsockopt(table_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
345 throw art::Exception(art::errors::Configuration) <<
346 "RoutingMasterCore: Unable to enable port reuse on table update socket" << std::endl;
350 if (setsockopt(table_socket_, IPPROTO_IP, IP_MULTICAST_LOOP, &yes,
sizeof(yes)) < 0)
352 TLOG(TLVL_ERROR) <<
"Unable to enable multicast loopback on table socket" ;
355 if (setsockopt(table_socket_, IPPROTO_IP, IP_MULTICAST_IF, &addr,
sizeof(addr)) == -1)
357 TLOG(TLVL_ERROR) <<
"Cannot set outgoing interface. Errno: " << errno ;
361 if (setsockopt(table_socket_, SOL_SOCKET, SO_BROADCAST, (
void*)&yes,
sizeof(
int)) == -1)
363 TLOG(TLVL_ERROR) <<
"Cannot set request socket to broadcast. Errno: " << errno ;
369 if (ack_socket_ == -1)
371 ack_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
374 throw art::Exception(art::errors::Configuration) <<
"RoutingMasterCore: Error creating socket for receiving table update acks!" << std::endl;
378 struct sockaddr_in si_me_request;
381 if (setsockopt(ack_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
383 throw art::Exception(art::errors::Configuration) <<
384 "RoutingMasterCore: Unable to enable port reuse on ack socket" << std::endl;
387 memset(&si_me_request, 0,
sizeof(si_me_request));
388 si_me_request.sin_family = AF_INET;
389 si_me_request.sin_port = htons(receive_acks_port_);
390 si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
391 if (bind(ack_socket_, reinterpret_cast<struct sockaddr *>(&si_me_request),
sizeof(si_me_request)) == -1)
393 throw art::Exception(art::errors::Configuration) <<
394 "RoutingMasterCore: Cannot bind request socket to port " << receive_acks_port_ << std::endl;
397 TLOG(TLVL_DEBUG) <<
"Listening for acks on 0.0.0.0 port " << receive_acks_port_ ;
400 auto acks = std::unordered_map<int, bool>();
401 for (
auto& r : sender_ranks_)
406 auto start_time = std::chrono::steady_clock::now();
407 while (std::count_if(acks.begin(), acks.end(), [](std::pair<int, bool> p) {
return !p.second; }) > 0 && !stop_requested_)
413 TLOG(TLVL_DEBUG) <<
"Sending table information for " << header.
nEntries <<
" events to multicast group " << send_tables_address_ <<
", port " << send_tables_port_ ;
414 TRACE(16,
"headerData:0x%016lx%016lx packetData:0x%016lx%016lx"
415 ,((
unsigned long*)&header)[0],((
unsigned long*)&header)[1], ((
unsigned long*)&packet[0])[0],((
unsigned long*)&packet[0])[1] );
416 auto hdrsts = sendto(table_socket_, &header,
sizeof(
detail::RoutingPacketHeader), 0, reinterpret_cast<struct sockaddr *>(&send_tables_addr_),
sizeof(send_tables_addr_));
419 TLOG(TLVL_ERROR) <<
"Error sending routing message header. hdrsts=" << hdrsts;
421 auto pktsts = sendto(table_socket_, &packet[0], packetSize, 0, reinterpret_cast<struct sockaddr *>(&send_tables_addr_),
sizeof(send_tables_addr_));
422 if (pktsts != (ssize_t)packetSize)
424 TLOG(TLVL_ERROR) <<
"Error sending routing message data. hdrsts="<<hdrsts<<
" pktsts="<<pktsts;
429 auto first = packet[0].sequence_id;
430 auto last = packet.rbegin()->sequence_id;
431 TLOG(TLVL_DEBUG) <<
"Sent " << hdrsts <<
"+"<< pktsts <<
". Expecting acks to have first= " << first <<
", and last= " << last ;
434 auto startTime = std::chrono::steady_clock::now();
435 while (std::count_if(acks.begin(), acks.end(), [](std::pair<int, bool> p) {
return !p.second; }) > 0)
437 auto table_ack_wait_time_ms = current_table_interval_ms_ / max_ack_cycle_count_;
438 if (TimeUtils::GetElapsedTimeMilliseconds(startTime) > table_ack_wait_time_ms)
440 if (counter > max_ack_cycle_count_ && table_update_count_ > 0)
442 TLOG(TLVL_ERROR) <<
"Did not receive acks from all senders after resending table " << counter
443 <<
" times during the table_update_interval. Check the status of the senders!" ;
446 TLOG(TLVL_WARNING) <<
"Did not receive acks from all senders within the timeout (" << table_ack_wait_time_ms <<
" ms). Resending table update" ;
450 TLOG(20) <<
"send_event_table: Polling Request socket for new requests" ;
457 if (errno == EWOULDBLOCK || errno == EAGAIN)
459 TLOG(20) <<
"send_event_table: No more ack datagrams on ack socket." ;
464 TLOG(TLVL_ERROR) <<
"An unexpected error occurred during ack packet receive" ;
470 TLOG(TLVL_DEBUG) <<
"Ack packet from rank " << buffer.
rank <<
" has first= " << buffer.
first_sequence_id
474 TLOG(TLVL_DEBUG) <<
"Received table update acknowledgement from sender with rank " << buffer.
rank <<
"." ;
475 acks[buffer.
rank] =
true;
476 TLOG(TLVL_DEBUG) <<
"There are now " << std::count_if(acks.begin(), acks.end(), [](std::pair<int, bool> p) {
return !p.second; })
477 <<
" acks outstanding" ;
481 if (!acks.count(buffer.
rank))
483 TLOG(TLVL_ERROR) <<
"Received acknowledgement from invalid rank " << buffer.
rank <<
"!"
484 <<
" Cross-talk between RoutingMasters means there's a configuration error!" ;
488 TLOG(TLVL_WARNING) <<
"Received acknowledgement from rank " << buffer.
rank
489 <<
" that had incorrect sequence ID information. Discarding." ;
494 usleep(table_ack_wait_time_ms * 1000 / 10);
499 artdaq::TimeUtils::seconds delta = std::chrono::steady_clock::now() - start_time;
500 metricMan->sendMetric(
"Avg Table Acknowledge Time", delta.count(),
"seconds", 3, MetricMode::Average);
504 void artdaq::RoutingMasterCore::receive_tokens_()
506 while (!shutdown_requested_)
508 TLOG(TLVL_DEBUG) <<
"Receive Token loop start" ;
509 if (token_socket_ == -1)
511 TLOG(TLVL_DEBUG) <<
"Opening token listener socket" ;
513 fcntl(token_socket_, F_SETFL, O_NONBLOCK);
515 if (token_epoll_fd_ != -1) close(token_epoll_fd_);
516 struct epoll_event ev;
517 token_epoll_fd_ = epoll_create1(0);
518 ev.events = EPOLLIN | EPOLLPRI;
519 ev.data.fd = token_socket_;
520 if (epoll_ctl(token_epoll_fd_, EPOLL_CTL_ADD, token_socket_, &ev) == -1)
522 TLOG(TLVL_ERROR) <<
"Could not register listen socket to epoll fd" ;
526 if (token_socket_ == -1 || token_epoll_fd_ == -1)
528 TLOG(TLVL_DEBUG) <<
"One of the listen sockets was not opened successfully." ;
532 auto nfds = epoll_wait(token_epoll_fd_, &receive_token_events_[0], receive_token_events_.size(), current_table_interval_ms_);
535 perror(
"epoll_wait");
539 TLOG(TLVL_DEBUG) <<
"Received " << nfds <<
" events" ;
540 for (
auto n = 0; n < nfds; ++n)
542 if (receive_token_events_[n].data.fd == token_socket_)
544 TLOG(TLVL_DEBUG) <<
"Accepting new connection on token_socket" ;
546 socklen_t arglen =
sizeof(addr);
547 auto conn_sock = accept(token_socket_, (
struct sockaddr *)&addr, &arglen);
548 fcntl(conn_sock, F_SETFL, O_NONBLOCK);
556 receive_token_addrs_[conn_sock] = std::string(inet_ntoa(addr.sin_addr));
557 TLOG(TLVL_DEBUG) <<
"New fd is " << conn_sock <<
" for receiver at " << receive_token_addrs_[conn_sock];
558 struct epoll_event ev;
559 ev.events = EPOLLIN | EPOLLET;
560 ev.data.fd = conn_sock;
561 if (epoll_ctl(token_epoll_fd_, EPOLL_CTL_ADD, conn_sock, &ev) == -1)
563 perror(
"epoll_ctl: conn_sock");
578 auto startTime = artdaq::MonitoredQuantity::getCurrentTime();
583 detail::RoutingToken buff;
584 sts += read(receive_token_events_[n].data.fd, &buff,
sizeof(detail::RoutingToken) - sts);
587 TLOG(TLVL_INFO) <<
"Received 0-size token from " << receive_token_addrs_[receive_token_events_[n].data.fd];
590 else if(sts < 0 && errno == EAGAIN)
592 TLOG(TLVL_DEBUG) <<
"No more tokens from this rank. Continuing poll loop.";
597 TLOG(TLVL_ERROR) <<
"Error reading from token socket: sts=" << sts <<
", errno=" << errno;
598 receive_token_addrs_.erase(receive_token_events_[n].data.fd);
599 close(receive_token_events_[n].data.fd);
600 epoll_ctl(token_epoll_fd_, EPOLL_CTL_DEL, receive_token_events_[n].data.fd, NULL);
603 else if (sts ==
sizeof(detail::RoutingToken) && buff.header != TOKEN_MAGIC)
605 TLOG(TLVL_ERROR) <<
"Received invalid token from " << receive_token_addrs_[receive_token_events_[n].data.fd] <<
" sts=" << sts;
608 else if(sts ==
sizeof(detail::RoutingToken))
611 TLOG(TLVL_DEBUG) <<
"Received token from " << buff.rank <<
" indicating " << buff.new_slots_free <<
" slots are free." ;
612 received_token_count_ += buff.new_slots_free;
615 policy_->AddReceiverToken(buff.rank, buff.new_slots_free);
619 if (!received_token_counter_.count(buff.rank)) received_token_counter_[buff.rank] = 0;
620 received_token_counter_[buff.rank] += buff.new_slots_free;
621 TLOG(TLVL_DEBUG) <<
"RoutingMasterMode is RouteBySendCount. I have " << received_token_counter_[buff.rank] <<
" tokens for rank " << buff.rank <<
" and I need " << sender_ranks_.size() <<
"." ;
622 while (received_token_counter_[buff.rank] >= sender_ranks_.size())
624 TLOG(TLVL_DEBUG) <<
"RoutingMasterMode is RouteBySendCount. I have " << received_token_counter_[buff.rank] <<
" tokens for rank " << buff.rank <<
" and I need " << sender_ranks_.size()
625 <<
"... Sending token to policy" ;
626 policy_->AddReceiverToken(buff.rank, 1);
627 received_token_counter_[buff.rank] -= sender_ranks_.size();
632 auto delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
633 statsHelper_.addSample(TOKENS_RECEIVED_STAT_KEY, delta_time);
634 bool readyToReport = statsHelper_.readyToReport(delta_time);
637 std::string statString = buildStatisticsString_();
638 TLOG(TLVL_INFO) << statString;
646 void artdaq::RoutingMasterCore::start_recieve_token_thread_()
648 if (ev_token_receive_thread_.joinable()) ev_token_receive_thread_.join();
649 boost::thread::attributes attrs;
650 attrs.set_stack_size(4096 * 2000);
652 TLOG(TLVL_INFO) <<
"Starting Token Reception Thread" ;
654 ev_token_receive_thread_ = boost::thread(attrs, boost::bind(&RoutingMasterCore::receive_tokens_,
this));
656 catch(boost::exception
const& e)
658 std::cerr <<
"Exception encountered starting Token Reception thread: " << boost::diagnostic_information(e) <<
", errno=" << errno << std::endl;
661 TLOG(TLVL_INFO) <<
"Started Token Reception Thread";
666 std::string resultString;
669 auto tmpString = app_name +
" run number = " + std::to_string(run_id_.run())
670 +
", table updates sent = " + std::to_string(table_update_count_)
671 +
", Receiver tokens received = " + std::to_string(received_token_count_);
675 std::string artdaq::RoutingMasterCore::buildStatisticsString_()
const
677 std::ostringstream oss;
678 oss << app_name <<
" statistics:" << std::endl;
680 auto mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(TABLE_UPDATES_STAT_KEY);
681 if (mqPtr.get() !=
nullptr)
683 artdaq::MonitoredQuantityStats stats;
684 mqPtr->getStats(stats);
685 oss <<
" Table Update statistics: "
686 << stats.recentSampleCount <<
" table updates sent at "
687 << stats.recentSampleRate <<
" table updates/sec, , monitor window = "
688 << stats.recentDuration <<
" sec" << std::endl;
689 oss <<
" Average times per table update: ";
690 if (stats.recentSampleRate > 0.0)
692 oss <<
" elapsed time = "
693 << (1.0 / stats.recentSampleRate) <<
" sec";
695 oss <<
", avg table acknowledgement wait time = "
696 << (mqPtr->getRecentValueSum() / sender_ranks_.size()) <<
" sec" << std::endl;
699 mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(TOKENS_RECEIVED_STAT_KEY);
700 if (mqPtr.get() !=
nullptr)
702 artdaq::MonitoredQuantityStats stats;
703 mqPtr->getStats(stats);
704 oss <<
" Received Token statistics: "
705 << stats.recentSampleCount <<
" tokens received at "
706 << stats.recentSampleRate <<
" tokens/sec, , monitor window = "
707 << stats.recentDuration <<
" sec" << std::endl;
708 oss <<
" Average times per token: ";
709 if (stats.recentSampleRate > 0.0)
711 oss <<
" elapsed time = "
712 << (1.0 / stats.recentSampleRate) <<
" sec";
714 oss <<
", input token wait time = "
715 << mqPtr->getRecentValueSum() <<
" sec" << std::endl;
721 void artdaq::RoutingMasterCore::sendMetrics_()
723 auto mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(TABLE_UPDATES_STAT_KEY);
724 if (mqPtr.get() !=
nullptr)
726 artdaq::MonitoredQuantityStats stats;
727 mqPtr->getStats(stats);
728 metricMan->sendMetric(
"Table Update Count", static_cast<unsigned long>(stats.fullSampleCount),
"updates", 1, MetricMode::LastPoint);
729 metricMan->sendMetric(
"Table Update Rate", stats.recentSampleRate,
"updates/sec", 1, MetricMode::Average);
730 metricMan->sendMetric(
"Average Sender Acknowledgement Time", (mqPtr->getRecentValueSum() / sender_ranks_.size()),
"seconds", 3, MetricMode::Average);
733 mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(TOKENS_RECEIVED_STAT_KEY);
734 if (mqPtr.get() !=
nullptr)
736 artdaq::MonitoredQuantityStats stats;
737 mqPtr->getStats(stats);
738 metricMan->sendMetric(
"Receiver Token Count", static_cast<unsigned long>(stats.fullSampleCount),
"updates", 1, MetricMode::LastPoint);
739 metricMan->sendMetric(
"Receiver Token Rate", stats.recentSampleRate,
"updates/sec", 1, MetricMode::Average);
740 metricMan->sendMetric(
"Total Receiver Token Wait Time", mqPtr->getRecentValueSum(),
"seconds", 3, MetricMode::Average);
bool resume(uint64_t, uint64_t)
Resumes the RoutingMasterCore.
void addMonitoredQuantityName(std::string const &statKey)
Add a MonitoredQuantity name to the list.
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
bool soft_initialize(fhicl::ParameterSet const &pset, uint64_t timeout, uint64_t timestamp)
Soft-Initializes the RoutingMasterCore.
static const std::string TOKENS_RECEIVED_STAT_KEY
Key for the Tokens Received MonitoredQuantity.
A row of the Routing Table.
bool reinitialize(fhicl::ParameterSet const &pset, uint64_t timeout, uint64_t timestamp)
Reinitializes the RoutingMasterCore.
bool start(art::RunID id, uint64_t, uint64_t)
Start the RoutingMasterCore.
A RoutingAckPacket contains the rank of the table receiver, plus the first and last sequence IDs in t...
RoutingMasterCore()
RoutingMasterCore Constructor.
Events should be routed by sequence ID (BR -> EB)
bool initialize(fhicl::ParameterSet const &pset, uint64_t, uint64_t)
Processes the initialize request.
The RoutingToken contains the magic bytes, the rank of the token sender, and the number of slots free...
int TCP_listen_fd(int port, int rcvbuf)
Create a TCP listening socket on the given port and INADDR_ANY, with the given receive buffer...
Fragment::sequence_id_t first_sequence_id
The first sequence ID in the received RoutingPacket.
bool pause(uint64_t, uint64_t)
Pauses the RoutingMasterCore.
Fragment::sequence_id_t last_sequence_id
The last sequence ID in the received RoutingPacket.
std::vector< RoutingPacketEntry > RoutingPacket
A RoutingPacket is simply a vector of RoutingPacketEntry objects. It is not suitable for network tran...
bool stop(uint64_t, uint64_t)
Stops the RoutingMasterCore.
int rank
The rank from which the RoutingAckPacket came.
std::unique_ptr< RoutingMasterPolicy > makeRoutingMasterPolicy(std::string const &policy_plugin_spec, fhicl::ParameterSet const &ps)
Load a RoutingMasterPolicy plugin.
std::string report(std::string const &) const
Send a report on the current status of the RoutingMasterCore.
static const std::string TABLE_UPDATES_STAT_KEY
Key for Table Update count MonnitoredQuantity.
bool shutdown(uint64_t)
Shuts Down the RoutingMasterCore.
void process_event_table()
Main loop of the RoutingMasterCore. Determines when to send the next table update, asks the RoutingMasterPolicy for the table to send, and sends it.
void send_event_table(detail::RoutingPacket table)
Sends a detail::RoutingPacket to the table receivers.
Events should be routed by send count (EB -> Agg)