9 #define TRACE_NAME "MetricManager"
11 #include "artdaq-utilities/Plugins/MetricManager.hh"
12 #include "artdaq-utilities/Plugins/makeMetricPlugin.hh"
13 #include "fhiclcpp/ParameterSet.h"
16 #include <boost/exception/all.hpp>
23 , missed_metric_calls_(0)
24 , metric_queue_max_size_(1000)
25 , metric_queue_notify_size_(10)
40 TLOG_INFO(
"MetricManager") <<
"Configuring metrics with parameter set:\n" << pset.to_string() << TLOG_ENDL;
42 std::vector<std::string> names = pset.get_pset_names();
44 for (
auto name : names)
46 if (name ==
"metric_queue_size")
48 metric_queue_max_size_ = pset.get<
size_t>(
"metric_queue_size");
50 else if (name ==
"metric_queue_notify_size")
52 metric_queue_notify_size_ = pset.get<
size_t>(
"metric_queue_notify_size");
58 TLOG_DEBUG(
"MetricManager") <<
"Constructing metric plugin with name " << name << TLOG_ENDL;
59 fhicl::ParameterSet plugin_pset = pset.get<fhicl::ParameterSet>(name);
61 plugin_pset.get<std::string>(
"metricPluginType",
""), plugin_pset));
63 catch (
const cet::exception& e)
65 TLOG_ERROR(
"MetricManager") <<
"Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
66 ", cet::exception object caught:" << e.explain_self() << TLOG_ENDL;
68 catch (
const boost::exception& e)
70 TLOG_ERROR(
"MetricManager") <<
"Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
71 ", boost::exception object caught: " << boost::diagnostic_information(e) << TLOG_ENDL;
73 catch (
const std::exception& e)
75 TLOG_ERROR(
"MetricManager") <<
"Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
76 ", std::exception caught: " << e.what() << TLOG_ENDL;
80 TLOG_ERROR(
"MetricManager") <<
"Unknown Exception caught in MetricManager::initialize, error loading plugin with name " << name << TLOG_ENDL;
92 TLOG_DEBUG(
"MetricManager") <<
"Starting MetricManager" << TLOG_ENDL;
93 for (
auto& metric : metric_plugins_)
97 metric->startMetrics();
98 TLOG_INFO(
"MetricManager") <<
"Metric Plugin " << metric->getLibName() <<
" started." << TLOG_ENDL;
103 TLOG_ERROR(
"MetricManager") <<
104 "Exception caught in MetricManager::do_start(), error starting plugin with name " <<
105 metric->getLibName() << TLOG_ENDL;
115 TLOG_DEBUG(
"MetricManager") <<
"Stopping Metrics" << TLOG_ENDL;
117 metric_cv_.notify_all();
118 TLOG_DEBUG(
"MetricManager") <<
"Joining Metric-Sending thread" << TLOG_ENDL;
119 if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
120 TLOG_DEBUG(
"MetricManager") <<
"do_stop Complete" << TLOG_ENDL;
129 initialize(pset, prefix);
134 TLOG_DEBUG(
"MetricManager") <<
"MetricManager is shutting down..." << TLOG_ENDL;
139 for (
auto& i : metric_plugins_)
143 std::string name = i->getLibName();
145 TLOG_DEBUG(
"MetricManager") <<
"Metric Plugin " << name <<
" shutdown." << TLOG_ENDL;
149 TLOG_ERROR(
"MetricManager") <<
150 "Exception caught in MetricManager::shutdown(), error shutting down metric with name " <<
151 i->getLibName() << TLOG_ENDL;
154 initialized_ =
false;
160 if (!initialized_) { TLOG_WARNING(
"MetricManager") <<
"Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
161 else if (!running_) { TLOG_WARNING(
"MetricManager") <<
"Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
164 if (!metric_queue_.count(name)) {
165 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
167 auto entry = &(metric_queue_[name]);
169 auto size = entry->first;
170 if (size < metric_queue_max_size_)
172 if (size >= metric_queue_notify_size_) TLOG_ARB(9,
"MetricManager") <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." << TLOG_ENDL;
173 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
175 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
177 entry->second.emplace_back(std::move(metric));
182 TLOG_ARB(10,
"MetricManager") <<
"Rejecting metric because queue full" << TLOG_ENDL;
183 missed_metric_calls_++;
185 metric_cv_.notify_all();
191 if (!initialized_) { TLOG_WARNING(
"MetricManager") <<
"Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
192 else if (!running_) { TLOG_WARNING(
"MetricManager") <<
"Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
195 if (!metric_queue_.count(name)) {
196 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
198 auto entry = &(metric_queue_[name]);
200 auto size = entry->first;
201 if (size < metric_queue_max_size_)
203 if (size >= metric_queue_notify_size_) TLOG_ARB(9,
"MetricManager") <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." << TLOG_ENDL;
204 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
206 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
208 entry->second.emplace_back(std::move(metric));
213 TLOG_ARB(10,
"MetricManager") <<
"Rejecting metric because queue full" << TLOG_ENDL;
214 missed_metric_calls_++;
216 metric_cv_.notify_all();
222 if (!initialized_) { TLOG_WARNING(
"MetricManager") <<
"Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
223 else if (!running_) { TLOG_WARNING(
"MetricManager") <<
"Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
226 if (!metric_queue_.count(name)) {
227 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
229 auto entry = &(metric_queue_[name]);
231 auto size = entry->first;
232 if (size < metric_queue_max_size_)
234 if (size >= metric_queue_notify_size_) TLOG_ARB(9,
"MetricManager") <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." << TLOG_ENDL;
235 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
237 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
239 entry->second.emplace_back(std::move(metric));
244 TLOG_ARB(10,
"MetricManager") <<
"Rejecting metric because queue full" << TLOG_ENDL;
245 missed_metric_calls_++;
247 metric_cv_.notify_all();
253 if (!initialized_) { TLOG_WARNING(
"MetricManager") <<
"Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
254 else if (!running_) { TLOG_WARNING(
"MetricManager") <<
"Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
257 if (!metric_queue_.count(name)) {
258 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
260 auto entry = &(metric_queue_[name]);
262 auto size = entry->first;
263 if (size < metric_queue_max_size_)
265 if (size >= metric_queue_notify_size_) TLOG_ARB(9,
"MetricManager") <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." << TLOG_ENDL;
266 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
268 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
270 entry->second.emplace_back(std::move(metric));
275 TLOG_ARB(10,
"MetricManager") <<
"Rejecting metric because queue full" << TLOG_ENDL;
276 missed_metric_calls_++;
278 metric_cv_.notify_all();
284 if (!initialized_) { TLOG_WARNING(
"MetricManager") <<
"Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
285 else if (!running_) { TLOG_WARNING(
"MetricManager") <<
"Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
288 if (!metric_queue_.count(name)) {
289 metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
291 auto entry = &(metric_queue_[name]);
293 auto size = entry->first;
294 if (size < metric_queue_max_size_)
296 if (size >= metric_queue_notify_size_) TLOG_ARB(9,
"MetricManager") <<
"Metric queue is at size " << size <<
" of " << metric_queue_max_size_ <<
"." << TLOG_ENDL;
297 std::unique_ptr<MetricData> metric(
new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
299 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
301 entry->second.emplace_back(std::move(metric));
306 TLOG_ARB(10,
"MetricManager") <<
"Rejecting metric because queue full" << TLOG_ENDL;
307 missed_metric_calls_++;
309 metric_cv_.notify_all();
313 void artdaq::MetricManager::startMetricLoop_()
315 if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
316 TLOG_INFO(
"MetricManager") <<
"Starting Metric Sending Thread" << TLOG_ENDL;
317 boost::thread::attributes attrs;
318 attrs.set_stack_size(4096 * 200);
319 metric_sending_thread_ = boost::thread(attrs, boost::bind(&MetricManager::sendMetricLoop_,
this));
324 for (
auto& q : metric_queue_)
326 if (q.second.first != 0)
return false;
338 if (metric_queue_.count(name)) size = metric_queue_[name].first;
344 void artdaq::MetricManager::sendMetricLoop_()
346 auto last_send_time = std::chrono::steady_clock::time_point();
349 while (metricQueueEmpty() && running_)
351 std::unique_lock<std::mutex> lk(metric_mutex_);
352 metric_cv_.wait_for(lk, std::chrono::milliseconds(100));
353 auto now = std::chrono::steady_clock::now();
354 if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_send_time).count() > metric_send_interval_ms_)
356 for (
auto& metric : metric_plugins_) { metric->sendMetrics(); }
357 last_send_time = now;
361 auto temp_list = std::list<std::unique_ptr<MetricData>>();
363 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
365 for (
auto& q : metric_queue_)
367 temp_list.splice(temp_list.end(), q.second.second);
371 temp_list.emplace_back(
new MetricData(
"Metric Calls", temp_list.size(),
"metrics", 4,
MetricMode::Accumulate,
"",
false));
372 auto missed = missed_metric_calls_.exchange(0);
374 temp_list.emplace_back(
new MetricData(
"Missed Metric Calls", missed,
"metrics", 4,
MetricMode::Accumulate,
"",
false));
375 TLOG_TRACE(
"MetricManager") <<
"There are " << temp_list.size() <<
" Metric Calls to process (missed " << missed <<
")" << TLOG_ENDL;
378 while (temp_list.size() > 0)
380 auto data_ = std::move(temp_list.front());
381 temp_list.pop_front();
383 if (!data_->UseNameOverride)
385 if (data_->MetricPrefix.size() > 0)
387 data_->Name = prefix_ +
"." + data_->MetricPrefix +
"." + data_->Name;
391 data_->Name = prefix_ +
"." + data_->Name;
395 for (
auto& metric : metric_plugins_)
397 if (metric->getRunLevel() >= data_->Level)
401 metric->addMetricData(*data_);
402 last_send_time = std::chrono::steady_clock::now();
406 TLOG_ERROR(
"MetricManager") <<
407 "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
408 << metric->getLibName() << TLOG_ENDL;
414 for (
auto& metric : metric_plugins_)
416 metric->sendMetrics();
420 auto temp_list = std::list<std::unique_ptr<MetricData>>();
422 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
424 for (
auto& q : metric_queue_)
426 temp_list.splice(temp_list.end(), q.second.second);
428 metric_queue_.clear();
430 temp_list.emplace_back(
new MetricData(
"Metric Calls", temp_list.size(),
"metrics", 4,
MetricMode::Accumulate,
"",
false));
431 auto missed = missed_metric_calls_.exchange(0);
433 temp_list.emplace_back(
new MetricData(
"Missed Metric Calls", missed,
"metrics", 4,
MetricMode::Accumulate,
"",
false));
434 TLOG_TRACE(
"MetricManager") <<
"There are " << temp_list.size() <<
" Metric Calls to process (missed " << missed <<
")" << TLOG_ENDL;
437 while (temp_list.size() > 0)
439 auto data_ = std::move(temp_list.front());
440 temp_list.pop_front();
442 if (!data_->UseNameOverride)
444 if (data_->MetricPrefix.size() > 0)
446 data_->Name = prefix_ +
"." + data_->MetricPrefix +
"." + data_->Name;
450 data_->Name = prefix_ +
"." + data_->Name;
454 for (
auto& metric : metric_plugins_)
456 if (metric->getRunLevel() >= data_->Level)
460 metric->addMetricData(*data_);
461 last_send_time = std::chrono::steady_clock::now();
465 TLOG_ERROR(
"MetricManager") <<
466 "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
467 << metric->getLibName() << TLOG_ENDL;
473 for (
auto& metric : metric_plugins_)
477 metric->stopMetrics();
478 TLOG_DEBUG(
"MetricManager") <<
"Metric Plugin " << metric->getLibName() <<
" stopped." << TLOG_ENDL;
482 TLOG_ERROR(
"MetricManager") <<
483 "Exception caught in MetricManager::do_stop(), error stopping plugin with name " <<
484 metric->getLibName() << TLOG_ENDL;
487 TLOG_DEBUG(
"MetricManager") <<
"MetricManager has been stopped." << TLOG_ENDL;
void reinitialize(fhicl::ParameterSet const &pset, std::string prefix="")
Reinitialize all MetricPlugin Instances.
void shutdown()
Call the destructors for all configured MetricPlugin instances.
void initialize(fhicl::ParameterSet const &pset, std::string prefix="")
Initialize the MetricPlugin instances.
void sendMetric(std::string const &name, std::string const &value, std::string const &unit, int level, MetricMode mode, std::string const &metricPrefix="", bool useNameOverride=false)
Send a metric with the given parameters to any MetricPlugins with a threshold level >= to level...
std::unique_ptr< MetricPlugin > makeMetricPlugin(std::string const &generator_plugin_spec, fhicl::ParameterSet const &ps)
Load a given MetricPlugin and return a pointer to it.
MetricMode
The Mode of the metric indicates how multiple metric values should be combined within a reporting int...
size_t metricQueueSize(std::string name="")
Return the size of the named metric queue
MetricManager()
Construct an instance of the MetricManager class.
void do_start()
Perform startup actions for each configured MetricPlugin.
void do_stop()
Stop sending metrics to the MetricPlugin instances.
virtual ~MetricManager() noexcept
MetricManager destructor.
Report the sum of all values. Use for counters to report accurate results.
Small structure used to hold a metric data point before sending to the metric plugins ...
void do_resume()
Resume metric sending. Currently a No-Op.
bool metricQueueEmpty()
Returns whether the metric queue is completely empty
void do_pause()
Pause metric sending. Currently a No-Op.