9 #define TRACE_NAME "MetricManager"
11 #include "artdaq-utilities/Plugins/MetricManager.hh"
12 #include "artdaq-utilities/Plugins/makeMetricPlugin.hh"
13 #include "fhiclcpp/ParameterSet.h"
16 #include <boost/exception/all.hpp>
20 , metric_send_interval_ms_(15000)
24 , missed_metric_calls_(0)
25 , metric_queue_max_size_(1000)
26 , metric_queue_notify_size_(10)
41 TLOG(TLVL_INFO) <<
"Configuring metrics with parameter set:\n" << pset.to_string() ;
43 std::vector<std::string> names = pset.get_pset_names();
45 for (
auto name : names)
47 if (name ==
"metric_queue_size")
49 metric_queue_max_size_ = pset.get<
size_t>(
"metric_queue_size");
51 else if (name ==
"metric_queue_notify_size")
53 metric_queue_notify_size_ = pset.get<
size_t>(
"metric_queue_notify_size");
55 else if (name ==
"metric_send_maximum_delay_ms")
57 metric_send_interval_ms_ = pset.get<
int>(
"metric_send_maximum_delay_ms");
63 TLOG(TLVL_DEBUG) <<
"Constructing metric plugin with name " << name ;
64 fhicl::ParameterSet plugin_pset = pset.get<fhicl::ParameterSet>(name);
66 plugin_pset.get<std::string>(
"metricPluginType",
""), plugin_pset));
68 catch (
const cet::exception& e)
70 TLOG(TLVL_ERROR) <<
"Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
71 ", cet::exception object caught:" << e.explain_self() ;
73 catch (
const boost::exception& e)
75 TLOG(TLVL_ERROR) <<
"Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
76 ", boost::exception object caught: " << boost::diagnostic_information(e) ;
78 catch (
const std::exception& e)
80 TLOG(TLVL_ERROR) <<
"Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
81 ", std::exception caught: " << e.what() ;
85 TLOG(TLVL_ERROR) <<
"Unknown Exception caught in MetricManager::initialize, error loading plugin with name " << name ;
97 TLOG(TLVL_DEBUG) <<
"Starting MetricManager" ;
98 for (
auto& metric : metric_plugins_)
102 metric->startMetrics();
103 TLOG(TLVL_INFO) <<
"Metric Plugin " << metric->getLibName() <<
" started." ;
109 "Exception caught in MetricManager::do_start(), error starting plugin with name " <<
110 metric->getLibName() ;
120 TLOG(TLVL_DEBUG) <<
"Stopping Metrics" ;
122 metric_cv_.notify_all();
123 TLOG(TLVL_DEBUG) <<
"Joining Metric-Sending thread" ;
124 if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
125 TLOG(TLVL_DEBUG) <<
"do_stop Complete" ;
134 initialize(pset, prefix);
139 TLOG(TLVL_DEBUG) <<
"MetricManager is shutting down..." ;
144 for (
auto& i : metric_plugins_)
148 std::string name = i->getLibName();
150 TLOG(TLVL_DEBUG) <<
"Metric Plugin " << name <<
" shutdown." ;
155 "Exception caught in MetricManager::shutdown(), error shutting down metric with name " <<
159 initialized_ =
false;
165 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
166 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
169 if (!metric_queue_.count(name)) {
170 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
172 auto entry = &(metric_queue_[name]);
174 auto size = entry->first;
175 if (size < metric_queue_max_size_)
177 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
178 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
180 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
182 entry->second.emplace_back(std::move(metric));
187 TLOG(10) <<
"Rejecting metric because queue full" ;
188 missed_metric_calls_++;
190 metric_cv_.notify_all();
196 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
197 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
200 if (!metric_queue_.count(name)) {
201 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
203 auto entry = &(metric_queue_[name]);
205 auto size = entry->first;
206 if (size < metric_queue_max_size_)
208 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
209 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
211 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
213 entry->second.emplace_back(std::move(metric));
218 TLOG(10) <<
"Rejecting metric because queue full" ;
219 missed_metric_calls_++;
221 metric_cv_.notify_all();
227 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
228 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
231 if (!metric_queue_.count(name)) {
232 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
234 auto entry = &(metric_queue_[name]);
236 auto size = entry->first;
237 if (size < metric_queue_max_size_)
239 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
240 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
242 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
244 entry->second.emplace_back(std::move(metric));
249 TLOG(10) <<
"Rejecting metric because queue full" ;
250 missed_metric_calls_++;
252 metric_cv_.notify_all();
258 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
259 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
262 if (!metric_queue_.count(name)) {
263 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
265 auto entry = &(metric_queue_[name]);
267 auto size = entry->first;
268 if (size < metric_queue_max_size_)
270 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
271 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
273 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
275 entry->second.emplace_back(std::move(metric));
280 TLOG(10) <<
"Rejecting metric because queue full" ;
281 missed_metric_calls_++;
283 metric_cv_.notify_all();
289 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
290 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
293 if (!metric_queue_.count(name)) {
294 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
296 auto entry = &(metric_queue_[name]);
298 auto size = entry->first;
299 if (size < metric_queue_max_size_)
301 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
302 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
304 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
306 entry->second.emplace_back(std::move(metric));
311 TLOG(10) <<
"Rejecting metric because queue full" ;
312 missed_metric_calls_++;
314 metric_cv_.notify_all();
318 void artdaq::MetricManager::startMetricLoop_()
320 if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
321 TLOG(TLVL_INFO) <<
"Starting Metric Sending Thread" ;
322 boost::thread::attributes attrs;
323 attrs.set_stack_size(4096 * 200);
324 metric_sending_thread_ = boost::thread(attrs, boost::bind(&MetricManager::sendMetricLoop_,
this));
329 for (
auto& q : metric_queue_)
331 if (q.second.first != 0)
return false;
343 if (metric_queue_.count(name)) size = metric_queue_[name].first;
349 void artdaq::MetricManager::sendMetricLoop_()
351 auto last_send_time = std::chrono::steady_clock::time_point();
354 while (metricQueueEmpty() && running_)
356 std::unique_lock<std::mutex> lk(metric_mutex_);
357 metric_cv_.wait_for(lk, std::chrono::milliseconds(100));
358 auto now = std::chrono::steady_clock::now();
359 if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_send_time).count() > metric_send_interval_ms_)
361 for (
auto& metric : metric_plugins_) { metric->sendMetrics(); }
362 last_send_time = now;
366 auto processing_start = std::chrono::steady_clock::now();
367 auto temp_list = std::list<std::unique_ptr<MetricData>>();
369 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
371 for (
auto& q : metric_queue_)
373 temp_list.splice(temp_list.end(), q.second.second);
378 auto missed = missed_metric_calls_.exchange(0);
381 TLOG(TLVL_TRACE) <<
"There are " << temp_list.size() <<
" Metric Calls to process (missed " << missed <<
")" ;
384 while (temp_list.size() > 0)
386 auto data_ = std::move(temp_list.front());
387 temp_list.pop_front();
389 if (!data_->UseNameOverride)
391 if (data_->MetricPrefix.size() > 0)
393 data_->Name = prefix_ +
"." + data_->MetricPrefix +
"." + data_->Name;
397 data_->Name = prefix_ +
"." + data_->Name;
401 for (
auto& metric : metric_plugins_)
403 if (metric->getRunLevel() >= data_->Level)
407 metric->addMetricData(*data_);
408 last_send_time = std::chrono::steady_clock::now();
413 "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
414 << metric->getLibName() ;
420 for (
auto& metric : metric_plugins_)
422 metric->sendMetrics(
false, processing_start);
426 auto temp_list = std::list<std::unique_ptr<MetricData>>();
428 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
430 for (
auto& q : metric_queue_)
432 temp_list.splice(temp_list.end(), q.second.second);
434 metric_queue_.clear();
437 auto missed = missed_metric_calls_.exchange(0);
440 TLOG(TLVL_TRACE) <<
"There are " << temp_list.size() <<
" Metric Calls to process (missed " << missed <<
")" ;
443 while (temp_list.size() > 0)
445 auto data_ = std::move(temp_list.front());
446 temp_list.pop_front();
448 if (!data_->UseNameOverride)
450 if (data_->MetricPrefix.size() > 0)
452 data_->Name = prefix_ +
"." + data_->MetricPrefix +
"." + data_->Name;
456 data_->Name = prefix_ +
"." + data_->Name;
460 for (
auto& metric : metric_plugins_)
462 if (metric->getRunLevel() >= data_->Level)
466 metric->addMetricData(*data_);
467 last_send_time = std::chrono::steady_clock::now();
472 "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
473 << metric->getLibName() ;
479 for (
auto& metric : metric_plugins_)
483 metric->stopMetrics();
484 TLOG(TLVL_DEBUG) <<
"Metric Plugin " << metric->getLibName() <<
" stopped." ;
489 "Exception caught in MetricManager::do_stop(), error stopping plugin with name " <<
490 metric->getLibName() ;
493 TLOG(TLVL_DEBUG) <<
"MetricManager has been stopped." ;
void reinitialize(fhicl::ParameterSet const &pset, std::string prefix="")
Reinitialize all MetricPlugin Instances.
void shutdown()
Call the destructors for all configured MetricPlugin instances.
void initialize(fhicl::ParameterSet const &pset, std::string prefix="")
Initialize the MetricPlugin instances.
void sendMetric(std::string const &name, std::string const &value, std::string const &unit, int level, MetricMode mode, std::string const &metricPrefix="", bool useNameOverride=false)
Send a metric with the given parameters to any MetricPlugins with a threshold level >= to level...
std::unique_ptr< MetricPlugin > makeMetricPlugin(std::string const &generator_plugin_spec, fhicl::ParameterSet const &ps)
Load a given MetricPlugin and return a pointer to it.
MetricMode
The Mode of the metric indicates how multiple metric values should be combined within a reporting int...
size_t metricQueueSize(std::string name="")
Return the size of the named metric queue
MetricManager()
Construct an instance of the MetricManager class.
void do_start()
Perform startup actions for each configured MetricPlugin.
void do_stop()
Stop sending metrics to the MetricPlugin instances.
virtual ~MetricManager() noexcept
MetricManager destructor.
Sends both the Accumulate mode and Rate mode metric. (Rate mode metric will append "/s" to metric uni...
Small structure used to hold a metric data point before sending to the metric plugins ...
void do_resume()
Resume metric sending. Currently a No-Op.
bool metricQueueEmpty()
Returns whether the metric queue is completely empty
void do_pause()
Pause metric sending. Currently a No-Op.