9 #define TRACE_NAME "MetricManager"
11 #include "artdaq-utilities/Plugins/MetricManager.hh"
12 #include "artdaq-utilities/Plugins/makeMetricPlugin.hh"
13 #include "fhiclcpp/ParameterSet.h"
16 #include <boost/exception/all.hpp>
20 , metric_send_interval_ms_(15000)
24 , missed_metric_calls_(0)
25 , metric_queue_max_size_(1000)
26 , metric_queue_notify_size_(10)
41 TLOG(TLVL_INFO) <<
"Configuring metrics with parameter set: " << pset.to_string() ;
43 std::vector<std::string> names = pset.get_pset_names();
45 for (
auto name : names)
47 if (name ==
"metric_queue_size")
49 metric_queue_max_size_ = pset.get<
size_t>(
"metric_queue_size");
51 else if (name ==
"metric_queue_notify_size")
53 metric_queue_notify_size_ = pset.get<
size_t>(
"metric_queue_notify_size");
55 else if (name ==
"metric_send_maximum_delay_ms")
57 metric_send_interval_ms_ = pset.get<
int>(
"metric_send_maximum_delay_ms");
63 TLOG(TLVL_DEBUG) <<
"Constructing metric plugin with name " << name ;
64 fhicl::ParameterSet plugin_pset = pset.get<fhicl::ParameterSet>(name);
66 plugin_pset.get<std::string>(
"metricPluginType",
""), plugin_pset, prefix_));
68 catch (
const cet::exception& e)
70 TLOG(TLVL_ERROR) <<
"Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
71 ", cet::exception object caught:" << e.explain_self() ;
73 catch (
const boost::exception& e)
75 TLOG(TLVL_ERROR) <<
"Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
76 ", boost::exception object caught: " << boost::diagnostic_information(e) ;
78 catch (
const std::exception& e)
80 TLOG(TLVL_ERROR) <<
"Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
81 ", std::exception caught: " << e.what() ;
85 TLOG(TLVL_ERROR) <<
"Unknown Exception caught in MetricManager::initialize, error loading plugin with name " << name ;
95 auto lk = std::unique_lock<std::mutex>(metric_mutex_);
98 TLOG(TLVL_DEBUG) <<
"Starting MetricManager" ;
99 for (
auto& metric : metric_plugins_)
103 metric->startMetrics();
104 TLOG(TLVL_INFO) <<
"Metric Plugin " << metric->getLibName() <<
" started." ;
110 "Exception caught in MetricManager::do_start(), error starting plugin with name " <<
111 metric->getLibName() ;
121 auto lk = std::unique_lock<std::mutex>(metric_mutex_);
122 TLOG(TLVL_DEBUG) <<
"Stopping Metrics" ;
124 metric_cv_.notify_all();
125 TLOG(TLVL_DEBUG) <<
"Joining Metric-Sending thread" ;
127 if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
128 TLOG(TLVL_DEBUG) <<
"do_stop Complete" ;
137 initialize(pset, prefix);
142 TLOG(TLVL_DEBUG) <<
"MetricManager is shutting down..." ;
145 auto lk = std::unique_lock<std::mutex>(metric_mutex_);
148 for (
auto& i : metric_plugins_)
152 std::string name = i->getLibName();
154 TLOG(TLVL_DEBUG) <<
"Metric Plugin " << name <<
" shutdown." ;
159 "Exception caught in MetricManager::shutdown(), error shutting down metric with name " <<
163 initialized_ =
false;
169 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
170 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
174 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
175 if (!metric_queue_.count(name)) {
176 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
179 auto entry = &(metric_queue_[name]);
181 auto size = entry->first.load();
182 if (size < metric_queue_max_size_)
184 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
185 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
187 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
189 entry->second.emplace_back(std::move(metric));
194 TLOG(10) <<
"Rejecting metric because queue full" ;
195 missed_metric_calls_++;
197 metric_cv_.notify_all();
203 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
204 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
208 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
209 if (!metric_queue_.count(name)) {
210 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
213 auto entry = &(metric_queue_[name]);
215 auto size = entry->first.load();
216 if (size < metric_queue_max_size_)
218 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
219 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
221 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
223 entry->second.emplace_back(std::move(metric));
228 TLOG(10) <<
"Rejecting metric because queue full" ;
229 missed_metric_calls_++;
231 metric_cv_.notify_all();
237 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
238 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
242 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
243 if (!metric_queue_.count(name)) {
244 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
247 auto entry = &(metric_queue_[name]);
249 auto size = entry->first.load();
250 if (size < metric_queue_max_size_)
252 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
253 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
255 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
257 entry->second.emplace_back(std::move(metric));
262 TLOG(10) <<
"Rejecting metric because queue full" ;
263 missed_metric_calls_++;
265 metric_cv_.notify_all();
271 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
272 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
276 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
277 if (!metric_queue_.count(name)) {
278 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
281 auto entry = &(metric_queue_[name]);
283 auto size = entry->first.load();
284 if (size < metric_queue_max_size_)
286 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
287 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
289 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
291 entry->second.emplace_back(std::move(metric));
296 TLOG(10) <<
"Rejecting metric because queue full" ;
297 missed_metric_calls_++;
299 metric_cv_.notify_all();
305 if (!initialized_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager has not yet been initialized!" ; }
306 else if (!running_) { TLOG(TLVL_WARNING) <<
"Attempted to send metric when MetricManager stopped!" ; }
310 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
311 if (!metric_queue_.count(name)) {
312 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
315 auto entry = &(metric_queue_[name]);
317 auto size = entry->first.load();
318 if (size < metric_queue_max_size_)
320 if (size >= metric_queue_notify_size_) TLOG(9) <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." ;
321 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
323 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
325 entry->second.emplace_back(std::move(metric));
330 TLOG(10) <<
"Rejecting metric because queue full" ;
331 missed_metric_calls_++;
333 metric_cv_.notify_all();
337 void artdaq::MetricManager::startMetricLoop_()
339 if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
340 boost::thread::attributes attrs;
341 attrs.set_stack_size(4096 * 2000);
342 TLOG(TLVL_INFO) <<
"Starting Metric Sending Thread" ;
344 metric_sending_thread_ = boost::thread(attrs, boost::bind(&MetricManager::sendMetricLoop_,
this));
346 catch (
const boost::exception& e)
348 TLOG(TLVL_ERROR) <<
"Caught boost::exception starting Metric Sending thread: " << boost::diagnostic_information(e) <<
", errno=" << errno;
349 std::cerr <<
"Caught boost::exception starting Metric Sending thread: " << boost::diagnostic_information(e) <<
", errno=" << errno << std::endl;
352 TLOG(TLVL_INFO) <<
"Metric Sending thread started";
357 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
358 for (
auto& q : metric_queue_)
360 if (q.second.first != 0)
return false;
367 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
370 for (
auto& q : metric_queue_)
372 size += q.second.first;
376 if (metric_queue_.count(name)) size = metric_queue_[name].first;
382 void artdaq::MetricManager::sendMetricLoop_()
384 TLOG(TLVL_INFO) <<
"sendMetricLoop_ START";
385 auto last_send_time = std::chrono::steady_clock::time_point();
388 while (metricQueueEmpty() && running_)
390 std::unique_lock<std::mutex> lk(metric_mutex_);
391 metric_cv_.wait_for(lk, std::chrono::milliseconds(100));
392 auto now = std::chrono::steady_clock::now();
393 if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_send_time).count() > metric_send_interval_ms_)
395 for (
auto& metric : metric_plugins_) { metric->sendMetrics(); }
396 last_send_time = now;
400 auto processing_start = std::chrono::steady_clock::now();
401 auto temp_list = std::list<std::unique_ptr<MetricData>>();
403 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
405 for (
auto& q : metric_queue_)
407 temp_list.splice(temp_list.end(), q.second.second);
412 auto missed = missed_metric_calls_.exchange(0);
415 TLOG(TLVL_TRACE) <<
"There are " << temp_list.size() <<
" Metric Calls to process (missed " << missed <<
")" ;
418 while (temp_list.size() > 0)
420 auto data_ = std::move(temp_list.front());
421 temp_list.pop_front();
423 if (!data_->UseNameOverride)
425 if (data_->MetricPrefix.size() > 0)
427 data_->Name = prefix_ +
"." + data_->MetricPrefix +
"." + data_->Name;
431 data_->Name = prefix_ +
"." + data_->Name;
435 for (
auto& metric : metric_plugins_)
437 if (metric->getRunLevel() >= data_->Level)
441 metric->addMetricData(*data_);
442 last_send_time = std::chrono::steady_clock::now();
447 "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
448 << metric->getLibName() ;
454 for (
auto& metric : metric_plugins_)
456 metric->sendMetrics(
false, processing_start);
460 auto temp_list = std::list<std::unique_ptr<MetricData>>();
462 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
464 for (
auto& q : metric_queue_)
466 temp_list.splice(temp_list.end(), q.second.second);
468 metric_queue_.clear();
471 auto missed = missed_metric_calls_.exchange(0);
474 TLOG(TLVL_TRACE) <<
"There are " << temp_list.size() <<
" Metric Calls to process (missed " << missed <<
")" ;
477 while (temp_list.size() > 0)
479 auto data_ = std::move(temp_list.front());
480 temp_list.pop_front();
482 if (!data_->UseNameOverride)
484 if (data_->MetricPrefix.size() > 0)
486 data_->Name = prefix_ +
"." + data_->MetricPrefix +
"." + data_->Name;
490 data_->Name = prefix_ +
"." + data_->Name;
494 for (
auto& metric : metric_plugins_)
496 if (metric->getRunLevel() >= data_->Level)
500 metric->addMetricData(*data_);
501 last_send_time = std::chrono::steady_clock::now();
506 "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
507 << metric->getLibName() ;
513 for (
auto& metric : metric_plugins_)
517 metric->stopMetrics();
518 TLOG(TLVL_DEBUG) <<
"Metric Plugin " << metric->getLibName() <<
" stopped." ;
523 "Exception caught in MetricManager::do_stop(), error stopping plugin with name " <<
524 metric->getLibName() ;
527 TLOG(TLVL_DEBUG) <<
"MetricManager has been stopped." ;
void shutdown()
Call the destructors for all configured MetricPlugin instances.
void initialize(fhicl::ParameterSet const &pset, std::string const &prefix="")
Initialize the MetricPlugin instances.
void sendMetric(std::string const &name, std::string const &value, std::string const &unit, int level, MetricMode mode, std::string const &metricPrefix="", bool useNameOverride=false)
Send a metric with the given parameters to any MetricPlugins with a threshold level >= to level...
size_t metricQueueSize(std::string const &name="")
Return the size of the named metric queue
void reinitialize(fhicl::ParameterSet const &pset, std::string const &prefix="")
Reinitialize all MetricPlugin Instances.
MetricMode
The Mode of the metric indicates how multiple metric values should be combined within a reporting int...
MetricManager()
Construct an instance of the MetricManager class.
void do_start()
Perform startup actions for each configured MetricPlugin.
void do_stop()
Stop sending metrics to the MetricPlugin instances.
virtual ~MetricManager() noexcept
MetricManager destructor.
std::unique_ptr< MetricPlugin > makeMetricPlugin(std::string const &generator_plugin_spec, fhicl::ParameterSet const &ps, std::string const &app_name)
Load a given MetricPlugin and return a pointer to it.
Sends both the Accumulate mode and Rate mode metric. (Rate mode metric will append "/s" to metric uni...
Small structure used to hold a metric data point before sending to the metric plugins ...
void do_resume()
Resume metric sending. Currently a No-Op.
bool metricQueueEmpty()
Returns whether the metric queue is completely empty
void do_pause()
Pause metric sending. Currently a No-Op.