artdaq_utilities  1.08.06
MetricManager.cc
1 // MetricManager.cc: MetricManager class implementation file
2 // Author: Eric Flumerfelt
3 // Last Modified: 11/14/2014
4 //
5 // MetricManager loads a user-specified set of plugins, sends them their configuration,
6 // and sends them data as it is recieved. It also maintains the state of the plugins
7 // relative to the application state.
8 
9 #include "TRACE/tracemf.h"
10 #define TRACE_NAME "MetricManager"
11 
12 #include "artdaq-utilities/Plugins/MetricManager.hh"
13 #include "artdaq-utilities/Plugins/makeMetricPlugin.hh"
14 #include "fhiclcpp/ParameterSet.h"
15 
16 #include <pthread.h>
17 #include <boost/exception/all.hpp>
18 #include <chrono>
19 #include <memory>
20 
22  : metric_plugins_(0)
23  , system_metric_collector_(nullptr)
24  , initialized_(false)
25  , running_(false)
26  , active_(false)
27  , busy_(false)
28  , missed_metric_calls_(0)
29  , metric_calls_(0)
30 {
31  TLOG(TLVL_INFO) << "MetricManager CONSTRUCTOR";
32 }
33 
34 artdaq::MetricManager::~MetricManager() noexcept { shutdown(); }
35 
36 void artdaq::MetricManager::initialize(fhicl::ParameterSet const& pset, std::string const& prefix)
37 {
38  prefix_ = prefix;
39  if (initialized_)
40  {
41  shutdown();
42  }
43  TLOG(TLVL_INFO) << "Configuring metrics with parameter set: " << pset.to_string();
44 
45  std::vector<std::string> names = pset.get_names();
46 
47  metric_plugins_.clear();
48  bool send_system_metrics = false;
49  bool send_process_metrics = false;
50 
51  for (const auto& name : names)
52  {
53  if (name == "metric_queue_size")
54  {
55  metric_cache_max_size_ = pset.get<size_t>("metric_queue_size");
56  }
57  else if (name == "metric_queue_notify_size")
58  {
59  metric_cache_notify_size_ = pset.get<size_t>("metric_queue_notify_size");
60  }
61  else if (name == "metric_cache_size")
62  {
63  metric_cache_max_size_ = pset.get<size_t>("metric_cache_size");
64  }
65  else if (name == "metric_cache_notify_size")
66  {
67  metric_cache_notify_size_ = pset.get<size_t>("metric_cache_notify_size");
68  }
69  else if (name == "metric_send_maximum_delay_ms")
70  {
71  TLOG(TLVL_INFO) << "Setting metric_send_interval_ms_ to " << pset.get<int>("metric_send_maximum_delay_ms");
72  metric_send_interval_ms_ = pset.get<int>("metric_send_maximum_delay_ms");
73  }
74  else if (name == "metric_holdoff_us")
75  {
76  TLOG(TLVL_INFO) << "Setting metric_holdoff_us_ to " << pset.get<int>("metric_holdoff_us");
77  metric_holdoff_us_ = pset.get<int>("metric_holdoff_us");
78  }
79  else if (name == "send_system_metrics")
80  {
81  send_system_metrics = pset.get<bool>("send_system_metrics");
82  }
83  else if (name == "send_process_metrics")
84  {
85  send_process_metrics = pset.get<bool>("send_process_metrics");
86  }
87  else
88  {
89  try
90  {
91  TLOG(TLVL_DEBUG + 32) << "Constructing metric plugin with name " << name;
92  auto plugin_pset = pset.get<fhicl::ParameterSet>(name);
93  metric_plugins_.push_back(
94  makeMetricPlugin(plugin_pset.get<std::string>("metricPluginType", ""), plugin_pset, prefix_, name));
95  }
96  catch (const cet::exception& e)
97  {
98  TLOG(TLVL_ERROR) << "Exception caught in MetricManager::initialize, error loading plugin with name " << name
99  << ", cet::exception object caught:" << e.explain_self();
100  }
101  catch (const boost::exception& e)
102  {
103  TLOG(TLVL_ERROR) << "Exception caught in MetricManager::initialize, error loading plugin with name " << name
104  << ", boost::exception object caught: " << boost::diagnostic_information(e);
105  }
106  catch (const std::exception& e)
107  {
108  TLOG(TLVL_ERROR) << "Exception caught in MetricManager::initialize, error loading plugin with name " << name
109  << ", std::exception caught: " << e.what();
110  }
111  catch (...)
112  {
113  TLOG(TLVL_ERROR) << "Unknown Exception caught in MetricManager::initialize, error loading plugin with name "
114  << name;
115  }
116  }
117  }
118 
119  if (send_system_metrics || send_process_metrics)
120  {
121  system_metric_collector_ = std::make_unique<SystemMetricCollector>(send_process_metrics, send_system_metrics);
122  }
123 
124  initialized_ = true;
125 }
126 
128 {
129  std::lock_guard<std::mutex> lk(metric_mutex_);
130  if (!running_)
131  {
132  TLOG(TLVL_DEBUG + 32) << "Starting MetricManager";
133  for (auto& metric : metric_plugins_)
134  {
135  if (!metric)
136  {
137  continue;
138  }
139  try
140  {
141  metric->startMetrics();
142  TLOG(TLVL_INFO) << "Metric Plugin " << metric->getLibName() << " started.";
143  active_ = true;
144  }
145  catch (...)
146  {
147  TLOG(TLVL_ERROR) << "Exception caught in MetricManager::do_start(), error starting plugin with name "
148  << metric->getLibName();
149  }
150  }
151  running_ = true;
152  startMetricLoop_();
153  }
154 }
155 
157 {
158  std::unique_lock<std::mutex> lk(metric_mutex_);
159  TLOG(TLVL_DEBUG + 32) << "Stopping Metrics";
160  running_ = false;
161  metric_cv_.notify_all();
162  TLOG(TLVL_DEBUG + 32) << "Joining Metric-Sending thread";
163  lk.unlock();
164  try
165  {
166  if (metric_sending_thread_.joinable())
167  {
168  metric_sending_thread_.join();
169  }
170  }
171  catch (...)
172  {
173  // IGNORED
174  }
175  TLOG(TLVL_DEBUG + 32) << "do_stop Complete";
176 }
177 
179 { /*do_stop();*/
180 }
182 { /*do_start();*/
183 }
184 
185 void artdaq::MetricManager::reinitialize(fhicl::ParameterSet const& pset, std::string const& prefix)
186 {
187  shutdown();
188  initialize(pset, prefix);
189 }
190 
192 {
193  TRACE_STREAMER(TLVL_DEBUG + 32, TLOG2("MetricManager", 0), 0) << "MetricManager is shutting down..."; // Using TRACE_STREAMER in case MessageFacility is already gone
194  do_stop();
195 
196  std::lock_guard<std::mutex> lk(metric_mutex_);
197  if (initialized_)
198  {
199  TRACE_STREAMER(TLVL_DEBUG + 32, TLOG2("MetricManager", 0), 0) << "MetricManager is initialized shutting down...";
200  initialized_ = false;
201  for (auto& i : metric_plugins_)
202  {
203  try
204  {
205  std::string name = i->getLibName();
206  i.reset(nullptr);
207  TRACE_STREAMER(TLVL_DEBUG + 32, TLOG2("MetricManager", 0), 0) << "Metric Plugin " << name << " shutdown.";
208  }
209  catch (...)
210  {
211  TRACE_STREAMER(TLVL_ERROR, TLOG2("MetricManager", 0), 0) << "Exception caught in MetricManager::shutdown(), error shutting down metric with name "
212  << i->getLibName();
213  }
214  }
215  metric_plugins_.clear();
216  }
217 }
218 
219 void artdaq::MetricManager::sendMetric(std::string const& name, std::string const& value, std::string const& unit,
220  int level, MetricMode mode, std::string const& metricPrefix,
221  bool useNameOverride)
222 {
223  if (!initialized_)
224  {
225  if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - last_failure_).count() > 1000)
226  {
227  TLOG(TLVL_WARNING) << "Attempted to send metric " << name << " when MetricManager has not yet been initialized!";
228  last_failure_ = std::chrono::steady_clock::now();
229  }
230  }
231  else if (!running_)
232  {
233  if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - last_failure_).count() > 1000)
234  {
235  TLOG(TLVL_INFO) << "Attempted to send metric when MetricManager stopped!";
236  last_failure_ = std::chrono::steady_clock::now();
237  }
238  }
239  else if (active_)
240  {
241  {
242  std::lock_guard<std::mutex> lk(metric_cache_mutex_);
243  metric_calls_++;
244  last_metric_received_ = std::chrono::steady_clock::now();
245  auto& cached = metric_cache_[name];
246  if (cached == nullptr)
247  {
248  metric_cache_[name] = std::make_unique<MetricData>(name, value, unit, level, mode, metricPrefix, useNameOverride);
249  }
250  else
251  {
252  auto size = cached->DataPointCount;
253  if (size < metric_cache_max_size_)
254  {
255  if (size >= metric_cache_notify_size_)
256  {
257  TLOG(TLVL_DEBUG + 35) << "Metric cache is at size " << size << " of " << metric_cache_max_size_ << " for metric " << name
258  << ".";
259  }
260  if (mode == MetricMode::LastPoint)
261  {
262  cached->StringValue = value;
263  cached->DataPointCount = 1;
264  }
265  else
266  {
267  cached->StringValue += " " + value;
268  cached->DataPointCount++;
269  }
270  }
271  else
272  {
273  TLOG(TLVL_DEBUG + 36) << "Rejecting metric because queue full";
274  missed_metric_calls_++;
275  }
276  }
277  }
278  metric_cv_.notify_all();
279  }
280 }
281 
282 void artdaq::MetricManager::sendMetric(std::string const& name, int const& value, std::string const& unit, int level,
283  MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
284 {
285  if (!initialized_)
286  {
287  if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - last_failure_).count() > 1000)
288  {
289  TLOG(TLVL_WARNING) << "Attempted to send metric " << name << " when MetricManager has not yet been initialized!";
290  last_failure_ = std::chrono::steady_clock::now();
291  }
292  }
293  else if (!running_)
294  {
295  if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - last_failure_).count() > 1000)
296  {
297  TLOG(TLVL_INFO) << "Attempted to send metric when MetricManager stopped!";
298  last_failure_ = std::chrono::steady_clock::now();
299  }
300  }
301  else if (active_)
302  {
303  {
304  std::lock_guard<std::mutex> lk(metric_cache_mutex_);
305  metric_calls_++;
306  last_metric_received_ = std::chrono::steady_clock::now();
307  auto& cached = metric_cache_[name];
308  if (cached == nullptr)
309  {
310  metric_cache_[name] = std::make_unique<MetricData>(name, value, unit, level, mode, metricPrefix, useNameOverride);
311  }
312  else
313  {
314  auto size = cached->DataPointCount;
315  if (size < metric_cache_max_size_)
316  {
317  if (size >= metric_cache_notify_size_)
318  {
319  TLOG(TLVL_DEBUG + 35) << "Metric cache is at size " << size << " of " << metric_cache_max_size_ << " for metric " << name
320  << ".";
321  }
322  cached->AddPoint(value);
323  }
324  else
325  {
326  TLOG(TLVL_DEBUG + 36) << "Rejecting metric because queue full";
327  missed_metric_calls_++;
328  }
329  }
330  }
331  metric_cv_.notify_all();
332  }
333 }
334 
335 void artdaq::MetricManager::sendMetric(std::string const& name, double const& value, std::string const& unit, int level,
336  MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
337 {
338  if (!initialized_)
339  {
340  if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - last_failure_).count() > 1000)
341  {
342  TLOG(TLVL_WARNING) << "Attempted to send metric " << name << " when MetricManager has not yet been initialized!";
343  last_failure_ = std::chrono::steady_clock::now();
344  }
345  }
346  else if (!running_)
347  {
348  if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - last_failure_).count() > 1000)
349  {
350  TLOG(TLVL_INFO) << "Attempted to send metric when MetricManager stopped!";
351  last_failure_ = std::chrono::steady_clock::now();
352  }
353  }
354  else if (active_)
355  {
356  {
357  std::lock_guard<std::mutex> lk(metric_cache_mutex_);
358  metric_calls_++;
359  last_metric_received_ = std::chrono::steady_clock::now();
360  auto& cached = metric_cache_[name];
361  if (cached == nullptr)
362  {
363  metric_cache_[name] = std::make_unique<MetricData>(name, value, unit, level, mode, metricPrefix, useNameOverride);
364  }
365  else
366  {
367  auto size = cached->DataPointCount;
368  if (size < metric_cache_max_size_)
369  {
370  if (size >= metric_cache_notify_size_)
371  {
372  TLOG(TLVL_DEBUG + 35) << "Metric cache is at size " << size << " of " << metric_cache_max_size_ << " for metric " << name
373  << ".";
374  }
375  cached->AddPoint(value);
376  }
377  else
378  {
379  TLOG(TLVL_DEBUG + 36) << "Rejecting metric because queue full";
380  missed_metric_calls_++;
381  }
382  }
383  }
384  metric_cv_.notify_all();
385  }
386 }
387 
388 void artdaq::MetricManager::sendMetric(std::string const& name, float const& value, std::string const& unit, int level,
389  MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
390 {
391  if (!initialized_)
392  {
393  if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - last_failure_).count() > 1000)
394  {
395  TLOG(TLVL_WARNING) << "Attempted to send metric " << name << " when MetricManager has not yet been initialized!";
396  last_failure_ = std::chrono::steady_clock::now();
397  }
398  }
399  else if (!running_)
400  {
401  if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - last_failure_).count() > 1000)
402  {
403  TLOG(TLVL_INFO) << "Attempted to send metric when MetricManager stopped!";
404  last_failure_ = std::chrono::steady_clock::now();
405  }
406  }
407  else if (active_)
408  {
409  {
410  std::lock_guard<std::mutex> lk(metric_cache_mutex_);
411  metric_calls_++;
412  last_metric_received_ = std::chrono::steady_clock::now();
413  auto& cached = metric_cache_[name];
414  if (cached == nullptr)
415  {
416  metric_cache_[name] = std::make_unique<MetricData>(name, value, unit, level, mode, metricPrefix, useNameOverride);
417  }
418  else
419  {
420  auto size = cached->DataPointCount;
421  if (size < metric_cache_max_size_)
422  {
423  if (size >= metric_cache_notify_size_)
424  {
425  TLOG(TLVL_DEBUG + 35) << "Metric cache is at size " << size << " of " << metric_cache_max_size_ << " for metric " << name
426  << ".";
427  }
428  cached->AddPoint(value);
429  }
430  else
431  {
432  TLOG(TLVL_DEBUG + 36) << "Rejecting metric because queue full";
433  missed_metric_calls_++;
434  }
435  }
436  }
437  metric_cv_.notify_all();
438  }
439 }
440 
441 void artdaq::MetricManager::sendMetric(std::string const& name, uint64_t const& value, std::string const& unit,
442  int level, MetricMode mode, std::string const& metricPrefix,
443  bool useNameOverride)
444 {
445  if (!initialized_)
446  {
447  if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - last_failure_).count() > 1000)
448  {
449  TLOG(TLVL_WARNING) << "Attempted to send metric " << name << " when MetricManager has not yet been initialized!";
450  last_failure_ = std::chrono::steady_clock::now();
451  }
452  }
453  else if (!running_)
454  {
455  if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - last_failure_).count() > 1000)
456  {
457  TLOG(TLVL_INFO) << "Attempted to send metric when MetricManager stopped!";
458  last_failure_ = std::chrono::steady_clock::now();
459  }
460  }
461  else if (active_)
462  {
463  {
464  std::lock_guard<std::mutex> lk(metric_cache_mutex_);
465  metric_calls_++;
466  last_metric_received_ = std::chrono::steady_clock::now();
467  auto& cached = metric_cache_[name];
468  if (cached == nullptr)
469  {
470  metric_cache_[name] = std::make_unique<MetricData>(name, value, unit, level, mode, metricPrefix, useNameOverride);
471  }
472  else
473  {
474  auto size = cached->DataPointCount;
475  if (size < metric_cache_max_size_)
476  {
477  if (size >= metric_cache_notify_size_)
478  {
479  TLOG(TLVL_DEBUG + 35) << "Metric cache is at size " << size << " of " << metric_cache_max_size_ << " for metric " << name
480  << ".";
481  }
482  cached->AddPoint(value);
483  }
484  else
485  {
486  TLOG(TLVL_DEBUG + 36) << "Rejecting metric because queue full";
487  missed_metric_calls_++;
488  }
489  }
490  }
491  metric_cv_.notify_all();
492  }
493 }
494 
495 void artdaq::MetricManager::startMetricLoop_()
496 {
497  if (metric_sending_thread_.joinable())
498  {
499  metric_sending_thread_.join();
500  }
501  boost::thread::attributes attrs;
502  attrs.set_stack_size(4096 * 2000); // 8000 KB
503  TLOG(TLVL_INFO) << "Starting Metric Sending Thread";
504  try
505  {
506  metric_sending_thread_ = boost::thread(attrs, boost::bind(&MetricManager::sendMetricLoop_, this));
507 
508  char tname[16]; // Size 16 - see man page pthread_setname_np(3) and/or prctl(2)
509  snprintf(tname, sizeof(tname) - 1, "%s", "MetricSend"); // NOLINT
510  tname[sizeof(tname) - 1] = '\0'; // assure term. snprintf is not too evil :)
511  auto handle = metric_sending_thread_.native_handle();
512  pthread_setname_np(handle, tname);
513  }
514  catch (const boost::exception& e)
515  {
516  TLOG(TLVL_ERROR) << "Caught boost::exception starting Metric Sending thread: " << boost::diagnostic_information(e)
517  << ", errno=" << errno;
518  std::cerr << "Caught boost::exception starting Metric Sending thread: " << boost::diagnostic_information(e)
519  << ", errno=" << errno << std::endl;
520  exit(5);
521  }
522  TLOG(TLVL_INFO) << "Metric Sending thread started";
523 }
524 
526 {
527  std::lock_guard<std::mutex> lk(metric_cache_mutex_);
528  for (auto& cache_entry : metric_cache_)
529  {
530  if (cache_entry.second->DataPointCount > 0)
531  {
532  return false;
533  }
534  }
535 
536  return true;
537 }
538 
540 {
541  bool pluginsBusy = false;
542 
543  for (auto& p : metric_plugins_)
544  {
545  if (p->metricsPending())
546  {
547  pluginsBusy = true;
548  break;
549  }
550  }
551 
552  TLOG(TLVL_DEBUG + 33) << "Metric queue empty: " << metricQueueEmpty() << ", busy_: " << busy_ << ", Plugins busy: " << pluginsBusy;
553  return !metricQueueEmpty() || busy_ || pluginsBusy;
554 }
555 
556 size_t artdaq::MetricManager::metricQueueSize(std::string const& name)
557 {
558  std::lock_guard<std::mutex> lk(metric_cache_mutex_);
559  size_t size = 0;
560  if (name.empty())
561  {
562  for (auto& q : metric_cache_)
563  {
564  size += q.second->DataPointCount;
565  }
566  }
567  else
568  {
569  if (metric_cache_.count(name) != 0u)
570  {
571  size = metric_cache_[name]->DataPointCount;
572  }
573  }
574 
575  return size;
576 }
577 
578 void artdaq::MetricManager::sendMetricLoop_()
579 {
580  TLOG(TLVL_INFO) << "sendMetricLoop_ START";
581  auto last_send_time = std::chrono::steady_clock::time_point();
582  while (running_)
583  {
584  TLOG(TLVL_DEBUG + 34) << "sendMetricLoop_: Entering Metric input wait loop";
585  while (metricQueueEmpty() && running_)
586  {
587  std::unique_lock<std::mutex> lk(metric_mutex_);
588  metric_cv_.wait_for(lk, std::chrono::milliseconds(100));
589  auto now = std::chrono::steady_clock::now();
590  if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_send_time).count() >
591  metric_send_interval_ms_)
592  {
593  TLOG(TLVL_DEBUG + 34) << "sendMetricLoop_: Metric send interval exceeded: Sending metrics";
594  {
595  std::unique_lock<std::mutex> lk(metric_cache_mutex_); // last_metric_received_ is protected by metric_cache_mutex_
596  if (std::chrono::duration_cast<std::chrono::microseconds>(now - last_metric_received_).count() < metric_holdoff_us_)
597  {
598  lk.unlock();
599  usleep(metric_holdoff_us_);
600  }
601  }
602  for (auto& metric : metric_plugins_)
603  {
604  if (metric)
605  {
606  metric->sendMetrics();
607  }
608  }
609  last_send_time = now;
610  }
611  }
612  {
613  std::unique_lock<std::mutex> lk(metric_cache_mutex_); // last_metric_received_ is protected by metric_cache_mutex_
614  if (std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - last_metric_received_).count() < metric_holdoff_us_)
615  {
616  lk.unlock();
617  usleep(metric_holdoff_us_);
618  }
619  }
620 
621  TLOG(TLVL_DEBUG + 34) << "sendMetricLoop_: After Metric input wait loop";
622  busy_ = true;
623  auto processing_start = std::chrono::steady_clock::now();
624  auto temp_list = std::list<std::unique_ptr<MetricData>>();
625  {
626  std::lock_guard<std::mutex> lk(metric_cache_mutex_);
627 
628  for (auto& q : metric_cache_)
629  {
630  if (q.second != nullptr && q.second->DataPointCount > 0)
631  {
632  temp_list.emplace_back(new MetricData(*q.second));
633  q.second->Reset();
634  }
635  }
636  }
637 
638  auto calls = metric_calls_.exchange(0);
639  temp_list.emplace_back(
640  new MetricData("Metric Calls", calls, "metrics", 4, MetricMode::Accumulate | MetricMode::Rate, "", false));
641 
642  auto missed = missed_metric_calls_.exchange(0);
643  temp_list.emplace_back(
644  new MetricData("Missed Metric Calls", missed, "metrics", 4, MetricMode::Accumulate | MetricMode::Rate, "", false));
645 
646  TLOG(TLVL_DEBUG + 33) << "There are " << temp_list.size() << " Metrics to process (" << calls << " calls, " << missed
647  << " missed)";
648 
649  if (system_metric_collector_ != nullptr)
650  {
651  TLOG(TLVL_DEBUG + 33) << "Collecting System metrics (CPU, RAM, Network)";
652  auto systemMetrics = system_metric_collector_->SendMetrics();
653  for (auto& m : systemMetrics) { temp_list.emplace_back(std::move(m)); }
654  }
655 
656  TLOG(TLVL_DEBUG + 34) << "sendMetricLoop_: Before processing temp_list";
657  while (!temp_list.empty())
658  {
659  auto data_ = std::move(temp_list.front());
660  temp_list.pop_front();
661  if (data_->Type == MetricType::InvalidMetric)
662  {
663  continue;
664  }
665  if (!data_->UseNameOverride)
666  {
667  if (!data_->MetricPrefix.empty())
668  {
669  data_->Name = prefix_ + "." + data_->MetricPrefix + "." + data_->Name;
670  }
671  else
672  {
673  data_->Name = prefix_ + "." + data_->Name;
674  }
675  }
676 
677  for (auto& metric : metric_plugins_)
678  {
679  if (!metric)
680  {
681  continue;
682  }
683  if (metric->IsLevelEnabled(data_->Level))
684  {
685  try
686  {
687  metric->addMetricData(data_);
688  last_send_time = std::chrono::steady_clock::now();
689  }
690  catch (...)
691  {
692  TLOG(TLVL_ERROR) << "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
693  << metric->getLibName();
694  }
695  }
696  }
697  }
698 
699  TLOG(TLVL_DEBUG + 34) << "sendMetricLoop_: Before sending metrics";
700  for (auto& metric : metric_plugins_)
701  {
702  if (!metric)
703  {
704  continue;
705  }
706  metric->sendMetrics(false, processing_start);
707  }
708 
709  // Limit rate of metrics going to plugins
710  TLOG(TLVL_DEBUG + 34) << "sendMetricLoop_: End of working loop";
711  busy_ = false;
712  usleep(10000);
713  }
714 
715  busy_ = true;
716  auto temp_list = std::list<std::unique_ptr<MetricData>>();
717  {
718  std::lock_guard<std::mutex> lk(metric_cache_mutex_);
719 
720  for (auto& q : metric_cache_)
721  {
722  if (q.second != nullptr && q.second->DataPointCount > 0)
723  {
724  temp_list.emplace_back(new MetricData(*q.second));
725  q.second->Reset();
726  }
727  }
728  // metric_cache_.clear();
729  }
730 
731  auto calls = metric_calls_.exchange(0);
732  temp_list.emplace_back(
733  new MetricData("Metric Calls", calls, "metrics", 4, MetricMode::Accumulate | MetricMode::Rate, "", false));
734 
735  auto missed = missed_metric_calls_.exchange(0);
736  temp_list.emplace_back(
737  new MetricData("Missed Metric Calls", missed, "metrics", 4, MetricMode::Accumulate | MetricMode::Rate, "", false));
738 
739  TLOG(TLVL_DEBUG + 33) << "There are " << temp_list.size() << " Metrics to process (" << calls << " calls, " << missed
740  << " missed)";
741 
742  while (!temp_list.empty())
743  {
744  auto data_ = std::move(temp_list.front());
745  temp_list.pop_front();
746  if (data_->Type == MetricType::InvalidMetric)
747  {
748  continue;
749  }
750  if (!data_->UseNameOverride)
751  {
752  if (!data_->MetricPrefix.empty())
753  {
754  data_->Name = prefix_ + "." + data_->MetricPrefix + "." + data_->Name;
755  }
756  else
757  {
758  data_->Name = prefix_ + "." + data_->Name;
759  }
760  }
761 
762  for (auto& metric : metric_plugins_)
763  {
764  if (!metric)
765  {
766  continue;
767  }
768  if (metric->IsLevelEnabled(data_->Level))
769  {
770  try
771  {
772  metric->addMetricData(data_);
773  last_send_time = std::chrono::steady_clock::now();
774  }
775  catch (...)
776  {
777  TLOG(TLVL_ERROR) << "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
778  << metric->getLibName();
779  }
780  }
781  }
782  }
783 
784  for (auto& metric : metric_plugins_)
785  {
786  if (!metric)
787  {
788  continue;
789  }
790  try
791  {
792  metric->stopMetrics();
793  TLOG(TLVL_DEBUG + 32) << "Metric Plugin " << metric->getLibName() << " stopped.";
794  }
795  catch (...)
796  {
797  TLOG(TLVL_ERROR) << "Exception caught in MetricManager::do_stop(), error stopping plugin with name "
798  << metric->getLibName();
799  }
800  }
801  busy_ = false;
802  TLOG(TLVL_DEBUG + 32) << "MetricManager has been stopped.";
803 }
void shutdown()
Call the destructors for all configured MetricPlugin instances.
void initialize(fhicl::ParameterSet const &pset, std::string const &prefix="")
Initialize the MetricPlugin instances.
void sendMetric(std::string const &name, std::string const &value, std::string const &unit, int level, MetricMode mode, std::string const &metricPrefix="", bool useNameOverride=false)
Send a metric with the given parameters to any MetricPlugins with a threshold level &gt;= to level...
bool metricManagerBusy()
Determine whether the MetricManager or any of its plugins are currently processing metrics...
size_t metricQueueSize(std::string const &name="")
Return the size of the named metric queue
void reinitialize(fhicl::ParameterSet const &pset, std::string const &prefix="")
Reinitialize all MetricPlugin Instances.
Report the sum of all values. Use for counters to report accurate results.
MetricManager()
Construct an instance of the MetricManager class.
void do_start()
Perform startup actions for each configured MetricPlugin.
void do_stop()
Stop sending metrics to the MetricPlugin instances.
virtual ~MetricManager() noexcept
MetricManager destructor.
over. Use to create rates from counters.
Report only the last value recorded. Useful for event counters, run numbers, etc. ...
MetricMode
The Mode of the metric indicates how multiple metric values should be combined within a reporting int...
Definition: MetricData.hh:27
std::unique_ptr< MetricPlugin > makeMetricPlugin(std::string const &generator_plugin_spec, fhicl::ParameterSet const &ps, std::string const &app_name, std::string const &metric_name)
Load a given MetricPlugin and return a pointer to it.
Default, invalid value.
void do_resume()
Resume metric sending. Currently a No-Op.
bool metricQueueEmpty()
Returns whether the metric queue is completely empty
void do_pause()
Pause metric sending. Currently a No-Op.