10 #include "canvas/Utilities/Exception.h"
11 #include "cetlib_except/exception.h"
13 #define TRACE_NAME (app_name + "_RoutingMasterCore").c_str() // include these 2 first -
14 #include "artdaq/DAQdata/Globals.hh"
15 #include "artdaq-core/Data/Fragment.hh"
16 #include "artdaq-core/Utilities/ExceptionHandler.hh"
18 #include "artdaq/Application/RoutingMasterCore.hh"
19 #include "artdaq/Application/Routing/makeRoutingMasterPolicy.hh"
29 : received_token_counter_()
30 , shutdown_requested_(false)
31 , stop_requested_(true)
32 , pause_requested_(false)
37 TLOG(TLVL_DEBUG) <<
"Constructor" ;
44 TLOG(TLVL_DEBUG) <<
"Destructor" ;
45 artdaq::StatisticsCollection::getInstance().requestStop();
46 if (ev_token_receive_thread_.joinable()) ev_token_receive_thread_.join();
51 TLOG(TLVL_DEBUG) <<
"initialize method called with "
52 <<
"ParameterSet = \"" << pset.to_string()
56 fhicl::ParameterSet daq_pset;
59 daq_pset = pset.get<fhicl::ParameterSet>(
"daq");
64 <<
"Unable to find the DAQ parameters in the initialization "
65 <<
"ParameterSet: \"" + pset.to_string() +
"\"." ;
69 if (daq_pset.has_key(
"rank"))
71 if (my_rank >= 0 && daq_pset.get<
int>(
"rank") != my_rank) {
72 TLOG(TLVL_WARNING) <<
"Routing Master rank specified at startup is different than rank specified at configure! Using rank received at configure!";
74 my_rank = daq_pset.get<
int>(
"rank");
78 TLOG(TLVL_ERROR) <<
"Routing Master rank not specified at startup or in configuration! Aborting";
84 policy_pset_ = daq_pset.get<fhicl::ParameterSet>(
"policy");
89 <<
"Unable to find the policy parameters in the DAQ initialization ParameterSet: \"" + daq_pset.to_string() +
"\"." ;
94 fhicl::ParameterSet metric_pset;
97 metric_pset = daq_pset.get<fhicl::ParameterSet>(
"metrics");
101 if (metric_pset.is_empty())
103 TLOG(TLVL_INFO) <<
"No metric plugins appear to be defined" ;
107 metricMan->initialize(metric_pset, app_name);
111 ExceptionHandler(ExceptionHandlerRethrow::no,
112 "Error loading metrics in RoutingMasterCore::initialize()");
116 auto policy_plugin_spec = policy_pset_.get<std::string>(
"policy",
"");
117 if (policy_plugin_spec.length() == 0)
120 <<
"No fragment generator (parameter name = \"policy\") was "
121 <<
"specified in the policy ParameterSet. The "
122 <<
"DAQ initialization PSet was \"" << daq_pset.to_string() <<
"\"." ;
131 std::stringstream exception_string;
132 exception_string <<
"Exception thrown during initialization of policy of type \""
133 << policy_plugin_spec <<
"\"";
135 ExceptionHandler(ExceptionHandlerRethrow::no, exception_string.str());
137 TLOG(TLVL_DEBUG) <<
"FHiCL parameter set used to initialize the policy which threw an exception: " << policy_pset_.to_string() ;
142 rt_priority_ = daq_pset.get<
int>(
"rt_priority", 0);
143 sender_ranks_ = daq_pset.get<std::vector<int>>(
"sender_ranks");
144 num_receivers_ = policy_->GetReceiverCount();
146 receive_ack_events_ = std::vector<epoll_event>(sender_ranks_.size());
147 receive_token_events_ = std::vector<epoll_event>(num_receivers_ + 1);
149 auto mode = daq_pset.get<
bool>(
"senders_send_by_send_count",
false);
151 max_table_update_interval_ms_ = daq_pset.get<
size_t>(
"table_update_interval_ms", 1000);
152 current_table_interval_ms_ = max_table_update_interval_ms_;
153 max_ack_cycle_count_ = daq_pset.get<
size_t>(
"table_ack_retry_count", 5);
154 receive_token_port_ = daq_pset.get<
int>(
"routing_token_port", 35555);
155 send_tables_port_ = daq_pset.get<
int>(
"table_update_port", 35556);
156 receive_acks_port_ = daq_pset.get<
int>(
"table_acknowledge_port", 35557);
157 send_tables_address_ = daq_pset.get<std::string>(
"table_update_address",
"227.128.12.28");
158 receive_address_ = daq_pset.get<std::string>(
"routing_master_hostname",
"localhost");
161 statsHelper_.createCollectors(daq_pset, 100, 30.0, 60.0, TABLE_UPDATES_STAT_KEY);
163 shutdown_requested_.store(
false);
164 start_recieve_token_thread_();
171 stop_requested_.store(
false);
172 pause_requested_.store(
false);
174 statsHelper_.resetStatistics();
176 metricMan->do_start();
177 table_update_count_ = 0;
178 received_token_count_ = 0;
180 TLOG(TLVL_INFO) <<
"Started run " << run_id_.run() ;
186 TLOG(TLVL_INFO) <<
"Stopping run " << run_id_.run()
187 <<
" after " << table_update_count_ <<
" table updates."
188 <<
" and " << received_token_count_ <<
" received tokens." ;
189 stop_requested_.store(
true);
190 run_id_ = art::RunID::flushRun();
196 TLOG(TLVL_INFO) <<
"Pausing run " << run_id_.run()
197 <<
" after " << table_update_count_ <<
" table updates."
198 <<
" and " << received_token_count_ <<
" received tokens." ;
199 pause_requested_.store(
true);
205 TLOG(TLVL_DEBUG) <<
"Resuming run " << run_id_.run();
206 pause_requested_.store(
false);
207 metricMan->do_start();
213 shutdown_requested_.store(
true);
214 if (ev_token_receive_thread_.joinable()) ev_token_receive_thread_.join();
215 policy_.reset(
nullptr);
216 metricMan->shutdown();
222 TLOG(TLVL_INFO) <<
"soft_initialize method called with "
223 <<
"ParameterSet = \"" << pset.to_string()
225 return initialize(pset, e, f);
230 TLOG(TLVL_INFO) <<
"reinitialize method called with "
231 <<
"ParameterSet = \"" << pset.to_string()
233 return initialize(pset, e, f);
238 if (rt_priority_ > 0)
240 #pragma GCC diagnostic push
241 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
242 sched_param s_param = {};
243 s_param.sched_priority = rt_priority_;
244 if (pthread_setschedparam(pthread_self(), SCHED_RR, &s_param))
245 TLOG(TLVL_WARNING) <<
"setting realtime priority failed" ;
246 #pragma GCC diagnostic pop
252 if (rt_priority_ > 0)
254 #pragma GCC diagnostic push
255 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
256 sched_param s_param = {};
257 s_param.sched_priority = rt_priority_;
258 int status = pthread_setschedparam(pthread_self(), SCHED_RR, &s_param);
262 <<
"Failed to set realtime priority to " << rt_priority_
263 <<
", return code = " << status ;
265 #pragma GCC diagnostic pop
270 TLOG(TLVL_DEBUG) <<
"Sending initial table." ;
271 auto startTime = artdaq::MonitoredQuantity::getCurrentTime();
272 auto nextSendTime = startTime;
274 while (!stop_requested_ && !pause_requested_)
276 startTime = artdaq::MonitoredQuantity::getCurrentTime();
278 if (startTime >= nextSendTime)
280 auto table = policy_->GetCurrentTable();
281 if (table.size() > 0)
283 send_event_table(table);
284 ++table_update_count_;
285 delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
286 statsHelper_.addSample(TABLE_UPDATES_STAT_KEY, delta_time);
287 TLOG(16) <<
"process_fragments TABLE_UPDATES_STAT_KEY=" << delta_time ;
291 TLOG(TLVL_DEBUG) <<
"No tokens received in this update interval (" << current_table_interval_ms_ <<
" ms)! This most likely means that the receivers are not keeping up!" ;
293 auto max_tokens = policy_->GetMaxNumberOfTokens();
296 auto frac = table.size() /
static_cast<double>(max_tokens);
297 if (frac > 0.75) current_table_interval_ms_ = 9 * current_table_interval_ms_ / 10;
298 if (frac < 0.5) current_table_interval_ms_ = 11 * current_table_interval_ms_ / 10;
299 if (current_table_interval_ms_ > max_table_update_interval_ms_) current_table_interval_ms_ = max_table_update_interval_ms_;
300 if (current_table_interval_ms_ < 1) current_table_interval_ms_ = 1;
302 nextSendTime = startTime + current_table_interval_ms_ / 1000.0;
303 TLOG(TLVL_DEBUG) <<
"current_table_interval_ms is now " << current_table_interval_ms_ ;
307 usleep(current_table_interval_ms_ * 10);
312 metricMan->do_stop();
318 if (table_socket_ == -1)
320 table_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
321 if (table_socket_ < 0)
323 TLOG(TLVL_ERROR) <<
"I failed to create the socket for sending Data Requests! Errno: " << errno ;
326 auto sts =
ResolveHost(send_tables_address_.c_str(), send_tables_port_, send_tables_addr_);
329 TLOG(TLVL_ERROR) <<
"Unable to resolve table_update_address" ;
334 if (receive_address_ !=
"localhost")
336 TLOG(TLVL_DEBUG) <<
"Making sure that multicast sending uses the correct interface for hostname " << receive_address_ ;
341 throw art::Exception(art::errors::Configuration) <<
"RoutingMasterCore: Unable to resolve routing_master_address" << std::endl;;
344 if (setsockopt(table_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
346 throw art::Exception(art::errors::Configuration) <<
347 "RoutingMasterCore: Unable to enable port reuse on table update socket" << std::endl;
351 if (setsockopt(table_socket_, IPPROTO_IP, IP_MULTICAST_LOOP, &yes,
sizeof(yes)) < 0)
353 TLOG(TLVL_ERROR) <<
"Unable to enable multicast loopback on table socket" ;
356 if (setsockopt(table_socket_, IPPROTO_IP, IP_MULTICAST_IF, &addr,
sizeof(addr)) == -1)
358 TLOG(TLVL_ERROR) <<
"Cannot set outgoing interface. Errno: " << errno ;
362 if (setsockopt(table_socket_, SOL_SOCKET, SO_BROADCAST, (
void*)&yes,
sizeof(
int)) == -1)
364 TLOG(TLVL_ERROR) <<
"Cannot set request socket to broadcast. Errno: " << errno ;
370 if (ack_socket_ == -1)
372 ack_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
375 throw art::Exception(art::errors::Configuration) <<
"RoutingMasterCore: Error creating socket for receiving table update acks!" << std::endl;
379 struct sockaddr_in si_me_request;
382 if (setsockopt(ack_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
384 throw art::Exception(art::errors::Configuration) <<
385 "RoutingMasterCore: Unable to enable port reuse on ack socket" << std::endl;
388 memset(&si_me_request, 0,
sizeof(si_me_request));
389 si_me_request.sin_family = AF_INET;
390 si_me_request.sin_port = htons(receive_acks_port_);
391 si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
392 if (bind(ack_socket_, reinterpret_cast<struct sockaddr *>(&si_me_request),
sizeof(si_me_request)) == -1)
394 throw art::Exception(art::errors::Configuration) <<
395 "RoutingMasterCore: Cannot bind request socket to port " << receive_acks_port_ << std::endl;
398 TLOG(TLVL_DEBUG) <<
"Listening for acks on 0.0.0.0 port " << receive_acks_port_ ;
401 auto acks = std::unordered_map<int, bool>();
402 for (
auto& r : sender_ranks_)
407 auto start_time = std::chrono::steady_clock::now();
408 while (std::count_if(acks.begin(), acks.end(), [](std::pair<int, bool> p) {
return !p.second; }) > 0 && !stop_requested_)
414 assert(packetSize +
sizeof(header) < MAX_ROUTING_TABLE_SIZE);
415 std::vector<uint8_t> buffer(packetSize +
sizeof(header));
419 TLOG(TLVL_DEBUG) <<
"Sending table information for " << header.nEntries <<
" events to multicast group " << send_tables_address_ <<
", port " << send_tables_port_ ;
420 TRACE(16,
"headerData:0x%016lx%016lx packetData:0x%016lx%016lx"
421 ,((
unsigned long*)&header)[0],((
unsigned long*)&header)[1], ((
unsigned long*)&packet[0])[0],((
unsigned long*)&packet[0])[1] );
422 auto sts = sendto(table_socket_, &buffer[0], buffer.size(), 0,
reinterpret_cast<struct sockaddr *
>(&send_tables_addr_),
sizeof(send_tables_addr_));
423 if (sts != static_cast<ssize_t>(buffer.size()))
425 TLOG(TLVL_ERROR) <<
"Error sending routing table. sts=" << sts;
430 auto first = packet[0].sequence_id;
431 auto last = packet.rbegin()->sequence_id;
432 TLOG(TLVL_DEBUG) <<
"Sent " << sts <<
" bytes. Expecting acks to have first= " << first <<
", and last= " << last ;
435 auto startTime = std::chrono::steady_clock::now();
436 while (std::count_if(acks.begin(), acks.end(), [](std::pair<int, bool> p) {
return !p.second; }) > 0)
438 auto table_ack_wait_time_ms = current_table_interval_ms_ / max_ack_cycle_count_;
439 if (TimeUtils::GetElapsedTimeMilliseconds(startTime) > table_ack_wait_time_ms)
441 if (counter > max_ack_cycle_count_ && table_update_count_ > 0)
443 TLOG(TLVL_ERROR) <<
"Did not receive acks from all senders after resending table " << counter
444 <<
" times during the table_update_interval. Check the status of the senders!" ;
447 TLOG(TLVL_WARNING) <<
"Did not receive acks from all senders within the timeout (" << table_ack_wait_time_ms <<
" ms). Resending table update" ;
451 TLOG(20) <<
"send_event_table: Polling Request socket for new requests" ;
458 if (errno == EWOULDBLOCK || errno == EAGAIN)
460 TLOG(20) <<
"send_event_table: No more ack datagrams on ack socket." ;
465 TLOG(TLVL_ERROR) <<
"An unexpected error occurred during ack packet receive" ;
471 TLOG(TLVL_DEBUG) <<
"Ack packet from rank " << buffer.
rank <<
" has first= " << buffer.
first_sequence_id
475 TLOG(TLVL_DEBUG) <<
"Received table update acknowledgement from sender with rank " << buffer.
rank <<
"." ;
476 acks[buffer.
rank] =
true;
477 TLOG(TLVL_DEBUG) <<
"There are now " << std::count_if(acks.begin(), acks.end(), [](std::pair<int, bool> p) {
return !p.second; })
478 <<
" acks outstanding" ;
482 if (!acks.count(buffer.
rank))
484 TLOG(TLVL_ERROR) <<
"Received acknowledgement from invalid rank " << buffer.
rank <<
"!"
485 <<
" Cross-talk between RoutingMasters means there's a configuration error!" ;
489 TLOG(TLVL_WARNING) <<
"Received acknowledgement from rank " << buffer.
rank
490 <<
" that had incorrect sequence ID information. Discarding."
491 <<
" Expected first/last=" << first <<
"/"<< last
497 usleep(table_ack_wait_time_ms * 1000 / 10);
502 artdaq::TimeUtils::seconds delta = std::chrono::steady_clock::now() - start_time;
503 metricMan->sendMetric(
"Avg Table Acknowledge Time", delta.count(),
"seconds", 3, MetricMode::Average);
507 void artdaq::RoutingMasterCore::receive_tokens_()
509 while (!shutdown_requested_)
511 TLOG(TLVL_DEBUG) <<
"Receive Token loop start" ;
512 if (token_socket_ == -1)
514 TLOG(TLVL_DEBUG) <<
"Opening token listener socket" ;
516 fcntl(token_socket_, F_SETFL, O_NONBLOCK);
518 if (token_epoll_fd_ != -1) close(token_epoll_fd_);
519 struct epoll_event ev;
520 token_epoll_fd_ = epoll_create1(0);
521 ev.events = EPOLLIN | EPOLLPRI;
522 ev.data.fd = token_socket_;
523 if (epoll_ctl(token_epoll_fd_, EPOLL_CTL_ADD, token_socket_, &ev) == -1)
525 TLOG(TLVL_ERROR) <<
"Could not register listen socket to epoll fd" ;
529 if (token_socket_ == -1 || token_epoll_fd_ == -1)
531 TLOG(TLVL_DEBUG) <<
"One of the listen sockets was not opened successfully." ;
535 auto nfds = epoll_wait(token_epoll_fd_, &receive_token_events_[0], receive_token_events_.size(), current_table_interval_ms_);
538 perror(
"epoll_wait");
542 while (stop_requested_ && !shutdown_requested_)
547 TLOG(TLVL_DEBUG) <<
"Received " << nfds <<
" events" ;
548 for (
auto n = 0; n < nfds; ++n)
550 if (receive_token_events_[n].data.fd == token_socket_)
552 TLOG(TLVL_DEBUG) <<
"Accepting new connection on token_socket" ;
554 socklen_t arglen =
sizeof(addr);
555 auto conn_sock = accept(token_socket_, (
struct sockaddr *)&addr, &arglen);
556 fcntl(conn_sock, F_SETFL, O_NONBLOCK);
564 receive_token_addrs_[conn_sock] = std::string(inet_ntoa(addr.sin_addr));
565 TLOG(TLVL_DEBUG) <<
"New fd is " << conn_sock <<
" for receiver at " << receive_token_addrs_[conn_sock];
566 struct epoll_event ev;
567 ev.events = EPOLLIN | EPOLLET;
568 ev.data.fd = conn_sock;
569 if (epoll_ctl(token_epoll_fd_, EPOLL_CTL_ADD, conn_sock, &ev) == -1)
571 perror(
"epoll_ctl: conn_sock");
586 auto startTime = artdaq::MonitoredQuantity::getCurrentTime();
591 detail::RoutingToken buff;
592 sts += read(receive_token_events_[n].data.fd, &buff,
sizeof(detail::RoutingToken) - sts);
595 TLOG(TLVL_INFO) <<
"Received 0-size token from " << receive_token_addrs_[receive_token_events_[n].data.fd];
598 else if(sts < 0 && errno == EAGAIN)
600 TLOG(TLVL_DEBUG) <<
"No more tokens from this rank. Continuing poll loop.";
605 TLOG(TLVL_ERROR) <<
"Error reading from token socket: sts=" << sts <<
", errno=" << errno;
606 receive_token_addrs_.erase(receive_token_events_[n].data.fd);
607 close(receive_token_events_[n].data.fd);
608 epoll_ctl(token_epoll_fd_, EPOLL_CTL_DEL, receive_token_events_[n].data.fd, NULL);
611 else if (sts ==
sizeof(detail::RoutingToken) && buff.header != TOKEN_MAGIC)
613 TLOG(TLVL_ERROR) <<
"Received invalid token from " << receive_token_addrs_[receive_token_events_[n].data.fd] <<
" sts=" << sts;
616 else if(sts ==
sizeof(detail::RoutingToken))
619 TLOG(TLVL_DEBUG) <<
"Received token from " << buff.rank <<
" indicating " << buff.new_slots_free <<
" slots are free. (run=" << buff.run_number <<
")" ;
620 if (buff.run_number != run_id_.run())
622 TLOG(TLVL_DEBUG) <<
"Received token from a different run number! Current = " << run_id_.run() <<
", token = " << buff.run_number <<
", ignoring (n=" << buff.new_slots_free <<
")";
626 received_token_count_ += buff.new_slots_free;
629 policy_->AddReceiverToken(buff.rank, buff.new_slots_free);
633 if (!received_token_counter_.count(buff.rank)) received_token_counter_[buff.rank] = 0;
634 received_token_counter_[buff.rank] += buff.new_slots_free;
635 TLOG(TLVL_DEBUG) <<
"RoutingMasterMode is RouteBySendCount. I have " << received_token_counter_[buff.rank] <<
" tokens for rank " << buff.rank <<
" and I need " << sender_ranks_.size() <<
"." ;
636 while (received_token_counter_[buff.rank] >= sender_ranks_.size())
638 TLOG(TLVL_DEBUG) <<
"RoutingMasterMode is RouteBySendCount. I have " << received_token_counter_[buff.rank] <<
" tokens for rank " << buff.rank <<
" and I need " << sender_ranks_.size()
639 <<
"... Sending token to policy" ;
640 policy_->AddReceiverToken(buff.rank, 1);
641 received_token_counter_[buff.rank] -= sender_ranks_.size();
647 auto delta_time = artdaq::MonitoredQuantity::getCurrentTime() - startTime;
648 statsHelper_.addSample(TOKENS_RECEIVED_STAT_KEY, delta_time);
649 bool readyToReport = statsHelper_.readyToReport(delta_time);
652 std::string statString = buildStatisticsString_();
653 TLOG(TLVL_INFO) << statString;
661 void artdaq::RoutingMasterCore::start_recieve_token_thread_()
663 if (ev_token_receive_thread_.joinable()) ev_token_receive_thread_.join();
664 boost::thread::attributes attrs;
665 attrs.set_stack_size(4096 * 2000);
667 TLOG(TLVL_INFO) <<
"Starting Token Reception Thread" ;
669 ev_token_receive_thread_ = boost::thread(attrs, boost::bind(&RoutingMasterCore::receive_tokens_,
this));
671 catch(boost::exception
const& e)
673 std::cerr <<
"Exception encountered starting Token Reception thread: " << boost::diagnostic_information(e) <<
", errno=" << errno << std::endl;
676 TLOG(TLVL_INFO) <<
"Started Token Reception Thread";
681 std::string resultString;
684 auto tmpString = app_name +
" run number = " + std::to_string(run_id_.run())
685 +
", table updates sent = " + std::to_string(table_update_count_)
686 +
", Receiver tokens received = " + std::to_string(received_token_count_);
690 std::string artdaq::RoutingMasterCore::buildStatisticsString_()
const
692 std::ostringstream oss;
693 oss << app_name <<
" statistics:" << std::endl;
695 auto mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(TABLE_UPDATES_STAT_KEY);
696 if (mqPtr.get() !=
nullptr)
698 artdaq::MonitoredQuantityStats stats;
699 mqPtr->getStats(stats);
700 oss <<
" Table Update statistics: "
701 << stats.recentSampleCount <<
" table updates sent at "
702 << stats.recentSampleRate <<
" table updates/sec, , monitor window = "
703 << stats.recentDuration <<
" sec" << std::endl;
704 oss <<
" Average times per table update: ";
705 if (stats.recentSampleRate > 0.0)
707 oss <<
" elapsed time = "
708 << (1.0 / stats.recentSampleRate) <<
" sec";
710 oss <<
", avg table acknowledgement wait time = "
711 << (mqPtr->getRecentValueSum() / sender_ranks_.size()) <<
" sec" << std::endl;
714 mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(TOKENS_RECEIVED_STAT_KEY);
715 if (mqPtr.get() !=
nullptr)
717 artdaq::MonitoredQuantityStats stats;
718 mqPtr->getStats(stats);
719 oss <<
" Received Token statistics: "
720 << stats.recentSampleCount <<
" tokens received at "
721 << stats.recentSampleRate <<
" tokens/sec, , monitor window = "
722 << stats.recentDuration <<
" sec" << std::endl;
723 oss <<
" Average times per token: ";
724 if (stats.recentSampleRate > 0.0)
726 oss <<
" elapsed time = "
727 << (1.0 / stats.recentSampleRate) <<
" sec";
729 oss <<
", input token wait time = "
730 << mqPtr->getRecentValueSum() <<
" sec" << std::endl;
736 void artdaq::RoutingMasterCore::sendMetrics_()
740 auto mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(TABLE_UPDATES_STAT_KEY);
741 if (mqPtr.get() !=
nullptr)
743 artdaq::MonitoredQuantityStats stats;
744 mqPtr->getStats(stats);
745 metricMan->sendMetric(
"Table Update Count", static_cast<unsigned long>(stats.fullSampleCount),
"updates", 1, MetricMode::LastPoint);
746 metricMan->sendMetric(
"Table Update Rate", stats.recentSampleRate,
"updates/sec", 1, MetricMode::Average);
747 metricMan->sendMetric(
"Average Sender Acknowledgement Time", (mqPtr->getRecentValueSum() / sender_ranks_.size()),
"seconds", 3, MetricMode::Average);
750 mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(TOKENS_RECEIVED_STAT_KEY);
751 if (mqPtr.get() !=
nullptr)
753 artdaq::MonitoredQuantityStats stats;
754 mqPtr->getStats(stats);
755 metricMan->sendMetric(
"Receiver Token Count", static_cast<unsigned long>(stats.fullSampleCount),
"updates", 1, MetricMode::LastPoint);
756 metricMan->sendMetric(
"Receiver Token Rate", stats.recentSampleRate,
"updates/sec", 1, MetricMode::Average);
757 metricMan->sendMetric(
"Total Receiver Token Wait Time", mqPtr->getRecentValueSum(),
"seconds", 3, MetricMode::Average);
bool resume(uint64_t, uint64_t)
Resumes the RoutingMasterCore.
void addMonitoredQuantityName(std::string const &statKey)
Add a MonitoredQuantity name to the list.
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
bool soft_initialize(fhicl::ParameterSet const &pset, uint64_t timeout, uint64_t timestamp)
Soft-Initializes the RoutingMasterCore.
static const std::string TOKENS_RECEIVED_STAT_KEY
Key for the Tokens Received MonitoredQuantity.
A row of the Routing Table.
bool reinitialize(fhicl::ParameterSet const &pset, uint64_t timeout, uint64_t timestamp)
Reinitializes the RoutingMasterCore.
bool start(art::RunID id, uint64_t, uint64_t)
Start the RoutingMasterCore.
A RoutingAckPacket contains the rank of the table receiver, plus the first and last sequence IDs in t...
RoutingMasterCore()
RoutingMasterCore Constructor.
Events should be routed by sequence ID (BR -> EB)
bool initialize(fhicl::ParameterSet const &pset, uint64_t, uint64_t)
Processes the initialize request.
The RoutingToken contains the magic bytes, the rank of the token sender, and the number of slots free...
int TCP_listen_fd(int port, int rcvbuf)
Create a TCP listening socket on the given port and INADDR_ANY, with the given receive buffer...
Fragment::sequence_id_t first_sequence_id
The first sequence ID in the received RoutingPacket.
bool pause(uint64_t, uint64_t)
Pauses the RoutingMasterCore.
Fragment::sequence_id_t last_sequence_id
The last sequence ID in the received RoutingPacket.
std::vector< RoutingPacketEntry > RoutingPacket
A RoutingPacket is simply a vector of RoutingPacketEntry objects. It is not suitable for network tran...
bool stop(uint64_t, uint64_t)
Stops the RoutingMasterCore.
int rank
The rank from which the RoutingAckPacket came.
std::unique_ptr< RoutingMasterPolicy > makeRoutingMasterPolicy(std::string const &policy_plugin_spec, fhicl::ParameterSet const &ps)
Load a RoutingMasterPolicy plugin.
std::string report(std::string const &) const
Send a report on the current status of the RoutingMasterCore.
static const std::string TABLE_UPDATES_STAT_KEY
Key for Table Update count MonnitoredQuantity.
bool shutdown(uint64_t)
Shuts Down the RoutingMasterCore.
void process_event_table()
Main loop of the RoutingMasterCore. Determines when to send the next table update, asks the RoutingMasterPolicy for the table to send, and sends it.
void send_event_table(detail::RoutingPacket table)
Sends a detail::RoutingPacket to the table receivers.
Events should be routed by send count (EB -> Agg)