artdaq_utilities  v1_04_01
MetricManager.cc
1 // MetricManager.cc: MetricManager class implementation file
2 // Author: Eric Flumerfelt
3 // Last Modified: 11/14/2014
4 //
5 // MetricManager loads a user-specified set of plugins, sends them their configuration,
6 // and sends them data as it is recieved. It also maintains the state of the plugins
7 // relative to the application state.
8 
9 #define TRACE_NAME "MetricManager"
10 #include "tracemf.h"
11 #include "artdaq-utilities/Plugins/MetricManager.hh"
12 #include "artdaq-utilities/Plugins/makeMetricPlugin.hh"
13 #include "fhiclcpp/ParameterSet.h"
14 
15 #include <chrono>
16 #include <boost/exception/all.hpp>
17 
19 MetricManager() : metric_plugins_(0)
20 , initialized_(false)
21 , running_(false)
22 , active_(false)
23 , missed_metric_calls_(0)
24 , metric_queue_max_size_(1000)
25 , metric_queue_notify_size_(10)
26 {}
27 
29 {
30  shutdown();
31 }
32 
33 void artdaq::MetricManager::initialize(fhicl::ParameterSet const& pset, std::string prefix)
34 {
35  prefix_ = prefix;
36  if (initialized_)
37  {
38  shutdown();
39  }
40  TLOG_INFO("MetricManager") << "Configuring metrics with parameter set:\n" << pset.to_string() << TLOG_ENDL;
41 
42  std::vector<std::string> names = pset.get_pset_names();
43 
44  for (auto name : names)
45  {
46  if (name == "metric_queue_size")
47  {
48  metric_queue_max_size_ = pset.get<size_t>("metric_queue_size");
49  }
50  else if (name == "metric_queue_notify_size")
51  {
52  metric_queue_notify_size_ = pset.get<size_t>("metric_queue_notify_size");
53  }
54  else
55  {
56  try
57  {
58  TLOG_DEBUG("MetricManager") << "Constructing metric plugin with name " << name << TLOG_ENDL;
59  fhicl::ParameterSet plugin_pset = pset.get<fhicl::ParameterSet>(name);
60  metric_plugins_.push_back(makeMetricPlugin(
61  plugin_pset.get<std::string>("metricPluginType", ""), plugin_pset));
62  }
63  catch (const cet::exception& e)
64  {
65  TLOG_ERROR("MetricManager") << "Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
66  ", cet::exception object caught:" << e.explain_self() << TLOG_ENDL;
67  }
68  catch (const boost::exception& e)
69  {
70  TLOG_ERROR("MetricManager") << "Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
71  ", boost::exception object caught: " << boost::diagnostic_information(e) << TLOG_ENDL;
72  }
73  catch (const std::exception& e)
74  {
75  TLOG_ERROR("MetricManager") << "Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
76  ", std::exception caught: " << e.what() << TLOG_ENDL;
77  }
78  catch (...)
79  {
80  TLOG_ERROR("MetricManager") << "Unknown Exception caught in MetricManager::initialize, error loading plugin with name " << name << TLOG_ENDL;
81  }
82  }
83  }
84 
85  initialized_ = true;
86 }
87 
89 {
90  if (!running_)
91  {
92  TLOG_DEBUG("MetricManager") << "Starting MetricManager" << TLOG_ENDL;
93  for (auto& metric : metric_plugins_)
94  {
95  try
96  {
97  metric->startMetrics();
98  TLOG_INFO("MetricManager") << "Metric Plugin " << metric->getLibName() << " started." << TLOG_ENDL;
99  active_ = true;
100  }
101  catch (...)
102  {
103  TLOG_ERROR("MetricManager") <<
104  "Exception caught in MetricManager::do_start(), error starting plugin with name " <<
105  metric->getLibName() << TLOG_ENDL;
106  }
107  }
108  running_ = true;
109  startMetricLoop_();
110  }
111 }
112 
114 {
115  TLOG_DEBUG("MetricManager") << "Stopping Metrics" << TLOG_ENDL;
116  running_ = false;
117  metric_cv_.notify_all();
118  TLOG_DEBUG("MetricManager") << "Joining Metric-Sending thread" << TLOG_ENDL;
119  if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
120  TLOG_DEBUG("MetricManager") << "do_stop Complete" << TLOG_ENDL;
121 }
122 
123 void artdaq::MetricManager::do_pause() { /*do_stop();*/ }
124 void artdaq::MetricManager::do_resume() { /*do_start();*/ }
125 
126 void artdaq::MetricManager::reinitialize(fhicl::ParameterSet const& pset, std::string prefix)
127 {
128  shutdown();
129  initialize(pset, prefix);
130 }
131 
133 {
134  TLOG_DEBUG("MetricManager") << "MetricManager is shutting down..." << TLOG_ENDL;
135  do_stop();
136 
137  if (initialized_)
138  {
139  for (auto& i : metric_plugins_)
140  {
141  try
142  {
143  std::string name = i->getLibName();
144  i.reset(nullptr);
145  TLOG_DEBUG("MetricManager") << "Metric Plugin " << name << " shutdown." << TLOG_ENDL;
146  }
147  catch (...)
148  {
149  TLOG_ERROR("MetricManager") <<
150  "Exception caught in MetricManager::shutdown(), error shutting down metric with name " <<
151  i->getLibName() << TLOG_ENDL;
152  }
153  }
154  initialized_ = false;
155  }
156 }
157 
158 void artdaq::MetricManager::sendMetric(std::string const& name, std::string const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
159 {
160  if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
161  else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
162  else if (active_)
163  {
164  if (!metric_queue_.count(name)) {
165  metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
166  }
167  auto entry = &(metric_queue_[name]);
168 
169  auto size = entry->first;
170  if (size < metric_queue_max_size_)
171  {
172  if (size >= metric_queue_notify_size_) TLOG_ARB(9, "MetricManager") << "Metric queue is at size " << size << " of " << metric_queue_max_size_ << "." << TLOG_ENDL;
173  std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
174  {
175  std::unique_lock<std::mutex> lk(metric_queue_mutex_);
176  entry->first++;
177  entry->second.emplace_back(std::move(metric));
178  }
179  }
180  else
181  {
182  TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
183  missed_metric_calls_++;
184  }
185  metric_cv_.notify_all();
186  }
187 }
188 
189 void artdaq::MetricManager::sendMetric(std::string const& name, int const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
190 {
191  if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
192  else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
193  else if (active_)
194  {
195  if (!metric_queue_.count(name)) {
196  metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
197  }
198  auto entry = &(metric_queue_[name]);
199 
200  auto size = entry->first;
201  if (size < metric_queue_max_size_)
202  {
203  if (size >= metric_queue_notify_size_) TLOG_ARB(9, "MetricManager") << "Metric queue is at size " << size << " of " << metric_queue_max_size_ << "." << TLOG_ENDL;
204  std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
205  {
206  std::unique_lock<std::mutex> lk(metric_queue_mutex_);
207  entry->first++;
208  entry->second.emplace_back(std::move(metric));
209  }
210  }
211  else
212  {
213  TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
214  missed_metric_calls_++;
215  }
216  metric_cv_.notify_all();
217  }
218 }
219 
220 void artdaq::MetricManager::sendMetric(std::string const& name, double const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
221 {
222  if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
223  else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
224  else if (active_)
225  {
226  if (!metric_queue_.count(name)) {
227  metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
228  }
229  auto entry = &(metric_queue_[name]);
230 
231  auto size = entry->first;
232  if (size < metric_queue_max_size_)
233  {
234  if (size >= metric_queue_notify_size_) TLOG_ARB(9, "MetricManager") << "Metric queue is at size " << size << " of " << metric_queue_max_size_ << "." << TLOG_ENDL;
235  std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
236  {
237  std::unique_lock<std::mutex> lk(metric_queue_mutex_);
238  entry->first++;
239  entry->second.emplace_back(std::move(metric));
240  }
241  }
242  else
243  {
244  TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
245  missed_metric_calls_++;
246  }
247  metric_cv_.notify_all();
248  }
249 }
250 
251 void artdaq::MetricManager::sendMetric(std::string const& name, float const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
252 {
253  if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
254  else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
255  else if (active_)
256  {
257  if (!metric_queue_.count(name)) {
258  metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
259  }
260  auto entry = &(metric_queue_[name]);
261 
262  auto size = entry->first;
263  if (size < metric_queue_max_size_)
264  {
265  if (size >= metric_queue_notify_size_) TLOG_ARB(9, "MetricManager") << "Metric queue is at size " << size << " of " << metric_queue_max_size_ << "." << TLOG_ENDL;
266  std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
267  {
268  std::unique_lock<std::mutex> lk(metric_queue_mutex_);
269  entry->first++;
270  entry->second.emplace_back(std::move(metric));
271  }
272  }
273  else
274  {
275  TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
276  missed_metric_calls_++;
277  }
278  metric_cv_.notify_all();
279  }
280 }
281 
282 void artdaq::MetricManager::sendMetric(std::string const& name, long unsigned int const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
283 {
284  if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
285  else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
286  else if (active_)
287  {
288  if (!metric_queue_.count(name)) {
289  metric_queue_[name] = std::make_pair<size_t, std::list<metric_data_ptr>>(0, std::list<metric_data_ptr>());
290  }
291  auto entry = &(metric_queue_[name]);
292 
293  auto size = entry->first;
294  if (size < metric_queue_max_size_)
295  {
296  if (size >= metric_queue_notify_size_) TLOG_ARB(9, "MetricManager") << "Metric queue is at size " << size << " of " << metric_queue_max_size_ << "." << TLOG_ENDL;
297  std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
298  {
299  std::unique_lock<std::mutex> lk(metric_queue_mutex_);
300  entry->first++;
301  entry->second.emplace_back(std::move(metric));
302  }
303  }
304  else
305  {
306  TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
307  missed_metric_calls_++;
308  }
309  metric_cv_.notify_all();
310  }
311 }
312 
313 void artdaq::MetricManager::startMetricLoop_()
314 {
315  if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
316  TLOG_INFO("MetricManager") << "Starting Metric Sending Thread" << TLOG_ENDL;
317  boost::thread::attributes attrs;
318  attrs.set_stack_size(4096 * 200); // 800 KB
319  metric_sending_thread_ = boost::thread(attrs, boost::bind(&MetricManager::sendMetricLoop_, this));
320 }
321 
323 {
324  for (auto& q : metric_queue_)
325  {
326  if (q.second.first != 0) return false;
327  }
328  return true;
329 }
330 
332 {
333  size_t size = 0;
334  if (name == "") {
335 
336  }
337  else {
338  if (metric_queue_.count(name)) size = metric_queue_[name].first;
339  }
340 
341  return size;
342 }
343 
344 void artdaq::MetricManager::sendMetricLoop_()
345 {
346  auto last_send_time = std::chrono::steady_clock::time_point();
347  while (running_)
348  {
349  while (metricQueueEmpty() && running_)
350  {
351  std::unique_lock<std::mutex> lk(metric_mutex_);
352  metric_cv_.wait_for(lk, std::chrono::milliseconds(100));
353  auto now = std::chrono::steady_clock::now();
354  if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_send_time).count() > metric_send_interval_ms_)
355  {
356  for (auto& metric : metric_plugins_) { metric->sendMetrics(); }
357  last_send_time = now;
358  }
359  }
360 
361  auto temp_list = std::list<std::unique_ptr<MetricData>>();
362  {
363  std::unique_lock<std::mutex> lk(metric_queue_mutex_);
364 
365  for (auto& q : metric_queue_)
366  {
367  temp_list.splice(temp_list.end(), q.second.second);
368  q.second.first = 0;
369  }
370 
371  temp_list.emplace_back(new MetricData("Metric Calls", temp_list.size(), "metrics", 4, MetricMode::Accumulate, "", false));
372  auto missed = missed_metric_calls_.exchange(0);
373 
374  temp_list.emplace_back(new MetricData("Missed Metric Calls", missed, "metrics", 4, MetricMode::Accumulate, "", false));
375  TLOG_TRACE("MetricManager") << "There are " << temp_list.size() << " Metric Calls to process (missed " << missed << ")" << TLOG_ENDL;
376  }
377 
378  while (temp_list.size() > 0)
379  {
380  auto data_ = std::move(temp_list.front());
381  temp_list.pop_front();
382  if (data_->Type == MetricType::InvalidMetric) continue;
383  if (!data_->UseNameOverride)
384  {
385  if (data_->MetricPrefix.size() > 0)
386  {
387  data_->Name = prefix_ + "." + data_->MetricPrefix + "." + data_->Name;
388  }
389  else
390  {
391  data_->Name = prefix_ + "." + data_->Name;
392  }
393  }
394 
395  for (auto& metric : metric_plugins_)
396  {
397  if (metric->getRunLevel() >= data_->Level)
398  {
399  try
400  {
401  metric->addMetricData(*data_);
402  last_send_time = std::chrono::steady_clock::now();
403  }
404  catch (...)
405  {
406  TLOG_ERROR("MetricManager") <<
407  "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
408  << metric->getLibName() << TLOG_ENDL;
409  }
410  }
411  }
412  }
413 
414  for (auto& metric : metric_plugins_)
415  {
416  metric->sendMetrics();
417  }
418  }
419 
420  auto temp_list = std::list<std::unique_ptr<MetricData>>();
421  {
422  std::unique_lock<std::mutex> lk(metric_queue_mutex_);
423 
424  for (auto& q : metric_queue_)
425  {
426  temp_list.splice(temp_list.end(), q.second.second);
427  }
428  metric_queue_.clear();
429 
430  temp_list.emplace_back(new MetricData("Metric Calls", temp_list.size(), "metrics", 4, MetricMode::Accumulate, "", false));
431  auto missed = missed_metric_calls_.exchange(0);
432 
433  temp_list.emplace_back(new MetricData("Missed Metric Calls", missed, "metrics", 4, MetricMode::Accumulate, "", false));
434  TLOG_TRACE("MetricManager") << "There are " << temp_list.size() << " Metric Calls to process (missed " << missed << ")" << TLOG_ENDL;
435  }
436 
437  while (temp_list.size() > 0)
438  {
439  auto data_ = std::move(temp_list.front());
440  temp_list.pop_front();
441  if (data_->Type == MetricType::InvalidMetric) continue;
442  if (!data_->UseNameOverride)
443  {
444  if (data_->MetricPrefix.size() > 0)
445  {
446  data_->Name = prefix_ + "." + data_->MetricPrefix + "." + data_->Name;
447  }
448  else
449  {
450  data_->Name = prefix_ + "." + data_->Name;
451  }
452  }
453 
454  for (auto& metric : metric_plugins_)
455  {
456  if (metric->getRunLevel() >= data_->Level)
457  {
458  try
459  {
460  metric->addMetricData(*data_);
461  last_send_time = std::chrono::steady_clock::now();
462  }
463  catch (...)
464  {
465  TLOG_ERROR("MetricManager") <<
466  "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
467  << metric->getLibName() << TLOG_ENDL;
468  }
469  }
470  }
471  }
472 
473  for (auto& metric : metric_plugins_)
474  {
475  try
476  {
477  metric->stopMetrics();
478  TLOG_DEBUG("MetricManager") << "Metric Plugin " << metric->getLibName() << " stopped." << TLOG_ENDL;
479  }
480  catch (...)
481  {
482  TLOG_ERROR("MetricManager") <<
483  "Exception caught in MetricManager::do_stop(), error stopping plugin with name " <<
484  metric->getLibName() << TLOG_ENDL;
485  }
486  }
487  TLOG_DEBUG("MetricManager") << "MetricManager has been stopped." << TLOG_ENDL;
488 }
void reinitialize(fhicl::ParameterSet const &pset, std::string prefix="")
Reinitialize all MetricPlugin Instances.
void shutdown()
Call the destructors for all configured MetricPlugin instances.
void initialize(fhicl::ParameterSet const &pset, std::string prefix="")
Initialize the MetricPlugin instances.
void sendMetric(std::string const &name, std::string const &value, std::string const &unit, int level, MetricMode mode, std::string const &metricPrefix="", bool useNameOverride=false)
Send a metric with the given parameters to any MetricPlugins with a threshold level &gt;= to level...
std::unique_ptr< MetricPlugin > makeMetricPlugin(std::string const &generator_plugin_spec, fhicl::ParameterSet const &ps)
Load a given MetricPlugin and return a pointer to it.
MetricMode
The Mode of the metric indicates how multiple metric values should be combined within a reporting int...
Definition: MetricData.hh:31
size_t metricQueueSize(std::string name="")
Return the size of the named metric queue
MetricManager()
Construct an instance of the MetricManager class.
void do_start()
Perform startup actions for each configured MetricPlugin.
void do_stop()
Stop sending metrics to the MetricPlugin instances.
virtual ~MetricManager() noexcept
MetricManager destructor.
Report the sum of all values. Use for counters to report accurate results.
Small structure used to hold a metric data point before sending to the metric plugins ...
Definition: MetricData.hh:41
Default, invalid value.
void do_resume()
Resume metric sending. Currently a No-Op.
bool metricQueueEmpty()
Returns whether the metric queue is completely empty
void do_pause()
Pause metric sending. Currently a No-Op.