9 #define TRACE_NAME "MetricManager"
11 #include "artdaq-utilities/Plugins/MetricManager.hh"
12 #include "artdaq-utilities/Plugins/makeMetricPlugin.hh"
13 #include "fhiclcpp/ParameterSet.h"
16 #include <boost/exception/all.hpp>
20 , metric_send_interval_ms_(15000)
24 , missed_metric_calls_(0)
25 , metric_queue_max_size_(1000)
26 , metric_queue_notify_size_(10)
41 TLOG(TLVL_INFO) <<
"Configuring metrics with parameter set: " << pset.to_string() ;
43 std::vector<std::string> names = pset.get_pset_names();
45 metric_plugins_.clear();
47 for (
auto name : names)
49 if (name ==
"metric_queue_size")
51 metric_queue_max_size_ = pset.get<
size_t>(
"metric_queue_size");
53 else if (name ==
"metric_queue_notify_size")
55 metric_queue_notify_size_ = pset.get<
size_t>(
"metric_queue_notify_size");
57 else if (name ==
"metric_send_maximum_delay_ms")
59 metric_send_interval_ms_ = pset.get<
int>(
"metric_send_maximum_delay_ms");
65 TLOG(TLVL_DEBUG) <<
"Constructing metric plugin with name " << name ;
66 fhicl::ParameterSet plugin_pset = pset.get<fhicl::ParameterSet>(name);
68 plugin_pset.get<std::string>(
"metricPluginType",
""), plugin_pset, prefix_));
70 catch (
const cet::exception& e)
72 TLOG(TLVL_ERROR) <<
"Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
73 ", cet::exception object caught:" << e.explain_self() ;
75 catch (
const boost::exception& e)
77 TLOG(TLVL_ERROR) <<
"Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
78 ", boost::exception object caught: " << boost::diagnostic_information(e) ;
80 catch (
const std::exception& e)
82 TLOG(TLVL_ERROR) <<
"Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
83 ", std::exception caught: " << e.what() ;
87 TLOG(TLVL_ERROR) <<
"Unknown Exception caught in MetricManager::initialize, error loading plugin with name " << name ;
97 auto lk = std::unique_lock<std::mutex>(metric_mutex_);
100 TLOG(TLVL_DEBUG) <<
"Starting MetricManager" ;
101 for (
auto& metric : metric_plugins_)
103 if (!metric)
continue;
106 metric->startMetrics();
107 TLOG(TLVL_INFO) <<
"Metric Plugin " << metric->getLibName() <<
" started." ;
113 "Exception caught in MetricManager::do_start(), error starting plugin with name " <<
114 metric->getLibName() ;
124 auto lk = std::unique_lock<std::mutex>(metric_mutex_);
125 TLOG(TLVL_DEBUG) <<
"Stopping Metrics" ;
127 metric_cv_.notify_all();
128 TLOG(TLVL_DEBUG) <<
"Joining Metric-Sending thread" ;
130 if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
131 TLOG(TLVL_DEBUG) <<
"do_stop Complete" ;
140 initialize(pset, prefix);
145 TLOG(TLVL_DEBUG) <<
"MetricManager is shutting down..." ;
148 auto lk = std::unique_lock<std::mutex>(metric_mutex_);
151 for (
auto& i : metric_plugins_)
155 std::string name = i->getLibName();
157 TLOG(TLVL_DEBUG) <<
"Metric Plugin " << name <<
" shutdown." ;
162 "Exception caught in MetricManager::shutdown(), error shutting down metric with name " <<
166 metric_plugins_.clear();
167 initialized_ =
false;
173 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
174 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
178 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
179 if (!metric_queue_.count(name)) {
180 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
183 auto entry = &(metric_queue_[name]);
185 auto size = entry->first.load();
186 if (size < metric_queue_max_size_)
188 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
189 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
191 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
193 entry->second.emplace_back(std::move(metric));
198 TLOG(10) <<
"Rejecting metric because queue full" ;
199 missed_metric_calls_++;
201 metric_cv_.notify_all();
207 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
208 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
212 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
213 if (!metric_queue_.count(name)) {
214 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
217 auto entry = &(metric_queue_[name]);
219 auto size = entry->first.load();
220 if (size < metric_queue_max_size_)
222 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
223 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
225 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
227 entry->second.emplace_back(std::move(metric));
232 TLOG(10) <<
"Rejecting metric because queue full" ;
233 missed_metric_calls_++;
235 metric_cv_.notify_all();
241 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
242 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
246 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
247 if (!metric_queue_.count(name)) {
248 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
251 auto entry = &(metric_queue_[name]);
253 auto size = entry->first.load();
254 if (size < metric_queue_max_size_)
256 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
257 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
259 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
261 entry->second.emplace_back(std::move(metric));
266 TLOG(10) <<
"Rejecting metric because queue full" ;
267 missed_metric_calls_++;
269 metric_cv_.notify_all();
275 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
276 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
280 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
281 if (!metric_queue_.count(name)) {
282 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
285 auto entry = &(metric_queue_[name]);
287 auto size = entry->first.load();
288 if (size < metric_queue_max_size_)
290 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
291 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
293 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
295 entry->second.emplace_back(std::move(metric));
300 TLOG(10) <<
"Rejecting metric because queue full" ;
301 missed_metric_calls_++;
303 metric_cv_.notify_all();
309 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
310 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
314 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
315 if (!metric_queue_.count(name)) {
316 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
319 auto entry = &(metric_queue_[name]);
321 auto size = entry->first.load();
322 if (size < metric_queue_max_size_)
324 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
325 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
327 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
329 entry->second.emplace_back(std::move(metric));
334 TLOG(10) <<
"Rejecting metric because queue full" ;
335 missed_metric_calls_++;
337 metric_cv_.notify_all();
341 void artdaq::MetricManager::startMetricLoop_()
343 if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
344 boost::thread::attributes attrs;
345 attrs.set_stack_size(4096 * 2000);
346 TLOG(TLVL_INFO) <<
"Starting Metric Sending Thread" ;
348 metric_sending_thread_ = boost::thread(attrs, boost::bind(&MetricManager::sendMetricLoop_,
this));
350 catch (
const boost::exception& e)
352 TLOG(TLVL_ERROR) <<
"Caught boost::exception starting Metric Sending thread: " << boost::diagnostic_information(e) <<
", errno=" << errno;
353 std::cerr <<
"Caught boost::exception starting Metric Sending thread: " << boost::diagnostic_information(e) <<
", errno=" << errno << std::endl;
356 TLOG(TLVL_INFO) <<
"Metric Sending thread started";
361 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
362 for (
auto& q : metric_queue_)
364 if (q.second.first != 0)
return false;
371 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
374 for (
auto& q : metric_queue_)
376 size += q.second.first;
380 if (metric_queue_.count(name)) size = metric_queue_[name].first;
386 void artdaq::MetricManager::sendMetricLoop_()
388 TLOG(TLVL_INFO) <<
"sendMetricLoop_ START";
389 auto last_send_time = std::chrono::steady_clock::time_point();
392 while (metricQueueEmpty() && running_)
394 std::unique_lock<std::mutex> lk(metric_mutex_);
395 metric_cv_.wait_for(lk, std::chrono::milliseconds(100));
396 auto now = std::chrono::steady_clock::now();
397 if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_send_time).count() > metric_send_interval_ms_)
399 for (
auto& metric : metric_plugins_) {
if(metric) metric->sendMetrics(); }
400 last_send_time = now;
404 auto processing_start = std::chrono::steady_clock::now();
405 auto temp_list = std::list<std::unique_ptr<MetricData>>();
407 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
409 for (
auto& q : metric_queue_)
411 temp_list.splice(temp_list.end(), q.second.second);
416 auto missed = missed_metric_calls_.exchange(0);
419 TLOG(TLVL_TRACE) <<
"There are " << temp_list.size() <<
" Metric Calls to process (missed " << missed <<
")" ;
422 while (temp_list.size() > 0)
424 auto data_ = std::move(temp_list.front());
425 temp_list.pop_front();
427 if (!data_->UseNameOverride)
429 if (data_->MetricPrefix.size() > 0)
431 data_->Name = prefix_ +
"." + data_->MetricPrefix +
"." + data_->Name;
435 data_->Name = prefix_ +
"." + data_->Name;
439 for (
auto& metric : metric_plugins_)
441 if (!metric)
continue;
442 if (metric->getRunLevel() >= data_->Level)
446 metric->addMetricData(*data_);
447 last_send_time = std::chrono::steady_clock::now();
452 "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
453 << metric->getLibName() ;
459 for (
auto& metric : metric_plugins_)
461 if (!metric)
continue;
462 metric->sendMetrics(
false, processing_start);
466 auto temp_list = std::list<std::unique_ptr<MetricData>>();
468 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
470 for (
auto& q : metric_queue_)
472 temp_list.splice(temp_list.end(), q.second.second);
474 metric_queue_.clear();
477 auto missed = missed_metric_calls_.exchange(0);
480 TLOG(TLVL_TRACE) <<
"There are " << temp_list.size() <<
" Metric Calls to process (missed " << missed <<
")" ;
483 while (temp_list.size() > 0)
485 auto data_ = std::move(temp_list.front());
486 temp_list.pop_front();
488 if (!data_->UseNameOverride)
490 if (data_->MetricPrefix.size() > 0)
492 data_->Name = prefix_ +
"." + data_->MetricPrefix +
"." + data_->Name;
496 data_->Name = prefix_ +
"." + data_->Name;
500 for (
auto& metric : metric_plugins_)
502 if (!metric)
continue;
503 if (metric->getRunLevel() >= data_->Level)
507 metric->addMetricData(*data_);
508 last_send_time = std::chrono::steady_clock::now();
513 "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
514 << metric->getLibName() ;
520 for (
auto& metric : metric_plugins_)
522 if (!metric)
continue;
525 metric->stopMetrics();
526 TLOG(TLVL_DEBUG) <<
"Metric Plugin " << metric->getLibName() <<
" stopped." ;
531 "Exception caught in MetricManager::do_stop(), error stopping plugin with name " <<
532 metric->getLibName() ;
535 TLOG(TLVL_DEBUG) <<
"MetricManager has been stopped." ;
void shutdown()
Call the destructors for all configured MetricPlugin instances.
void initialize(fhicl::ParameterSet const &pset, std::string const &prefix="")
Initialize the MetricPlugin instances.
void sendMetric(std::string const &name, std::string const &value, std::string const &unit, int level, MetricMode mode, std::string const &metricPrefix="", bool useNameOverride=false)
Send a metric with the given parameters to any MetricPlugins with a threshold level >= to level...
size_t metricQueueSize(std::string const &name="")
Return the size of the named metric queue
void reinitialize(fhicl::ParameterSet const &pset, std::string const &prefix="")
Reinitialize all MetricPlugin Instances.
MetricMode
The Mode of the metric indicates how multiple metric values should be combined within a reporting int...
MetricManager()
Construct an instance of the MetricManager class.
void do_start()
Perform startup actions for each configured MetricPlugin.
void do_stop()
Stop sending metrics to the MetricPlugin instances.
virtual ~MetricManager() noexcept
MetricManager destructor.
std::unique_ptr< MetricPlugin > makeMetricPlugin(std::string const &generator_plugin_spec, fhicl::ParameterSet const &ps, std::string const &app_name)
Load a given MetricPlugin and return a pointer to it.
Sends both the Accumulate mode and Rate mode metric. (Rate mode metric will append "/s" to metric uni...
Small structure used to hold a metric data point before sending to the metric plugins ...
void do_resume()
Resume metric sending. Currently a No-Op.
bool metricQueueEmpty()
Returns whether the metric queue is completely empty
void do_pause()
Pause metric sending. Currently a No-Op.