artdaq_utilities  v1_04_00
MetricManager.cc
1 // MetricManager.cc: MetricManager class implementation file
2 // Author: Eric Flumerfelt
3 // Last Modified: 11/14/2014
4 //
5 // MetricManager loads a user-specified set of plugins, sends them their configuration,
6 // and sends them data as it is recieved. It also maintains the state of the plugins
7 // relative to the application state.
8 
9 #define TRACE_NAME "MetricManager"
10 #include "tracemf.h"
11 #include "artdaq-utilities/Plugins/MetricManager.hh"
12 #include "artdaq-utilities/Plugins/makeMetricPlugin.hh"
13 #include "fhiclcpp/ParameterSet.h"
14 
15 #include <chrono>
16 #include <boost/exception/all.hpp>
17 
19 MetricManager() : metric_plugins_(0)
20 , initialized_(false)
21 , running_(false)
22 , active_(false)
23 , missed_metric_calls_(0)
24 , metric_queue_max_size_(10000)
25 {}
26 
28 {
29  shutdown();
30 }
31 
32 void artdaq::MetricManager::initialize(fhicl::ParameterSet const& pset, std::string prefix)
33 {
34  prefix_ = prefix;
35  if (initialized_)
36  {
37  shutdown();
38  }
39  TLOG_INFO("MetricManager") << "Configuring metrics with parameter set:\n" << pset.to_string() << TLOG_ENDL;
40 
41  std::vector<std::string> names = pset.get_pset_names();
42 
43  for (auto name : names)
44  {
45  if (name == "metric_queue_size")
46  {
47  metric_queue_max_size_ = pset.get<size_t>("metric_queue_size");
48  }
49  else
50  {
51  try
52  {
53  TLOG_DEBUG("MetricManager") << "Constructing metric plugin with name " << name << TLOG_ENDL;
54  fhicl::ParameterSet plugin_pset = pset.get<fhicl::ParameterSet>(name);
55  metric_plugins_.push_back(makeMetricPlugin(
56  plugin_pset.get<std::string>("metricPluginType", ""), plugin_pset));
57  }
58  catch (const cet::exception& e)
59  {
60  TLOG_ERROR("MetricManager") << "Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
61  ", cet::exception object caught:" << e.explain_self() << TLOG_ENDL;
62  }
63  catch (const boost::exception& e)
64  {
65  TLOG_ERROR("MetricManager") << "Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
66  ", boost::exception object caught: " << boost::diagnostic_information(e) << TLOG_ENDL;
67  }
68  catch (const std::exception& e)
69  {
70  TLOG_ERROR("MetricManager") << "Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
71  ", std::exception caught: " << e.what() << TLOG_ENDL;
72  }
73  catch (...)
74  {
75  TLOG_ERROR("MetricManager") << "Unknown Exception caught in MetricManager::initialize, error loading plugin with name " << name << TLOG_ENDL;
76  }
77  }
78  }
79 
80  initialized_ = true;
81 }
82 
84 {
85  if (!running_)
86  {
87  TLOG_DEBUG("MetricManager") << "Starting MetricManager" << TLOG_ENDL;
88  for (auto& metric : metric_plugins_)
89  {
90  try
91  {
92  metric->startMetrics();
93  TLOG_INFO("MetricManager") << "Metric Plugin " << metric->getLibName() << " started." << TLOG_ENDL;
94  active_ = true;
95  }
96  catch (...)
97  {
98  TLOG_ERROR("MetricManager") <<
99  "Exception caught in MetricManager::do_start(), error starting plugin with name " <<
100  metric->getLibName() << TLOG_ENDL;
101  }
102  }
103  running_ = true;
104  startMetricLoop_();
105  }
106 }
107 
109 {
110  TLOG_DEBUG("MetricManager") << "Stopping Metrics" << TLOG_ENDL;
111  running_ = false;
112  metric_cv_.notify_all();
113  TLOG_DEBUG("MetricManager") << "Joining Metric-Sending thread" << TLOG_ENDL;
114  if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
115  TLOG_DEBUG("MetricManager") << "do_stop Complete" << TLOG_ENDL;
116 }
117 
118 void artdaq::MetricManager::do_pause() { /*do_stop();*/ }
119 void artdaq::MetricManager::do_resume() { /*do_start();*/ }
120 
121 void artdaq::MetricManager::reinitialize(fhicl::ParameterSet const& pset, std::string prefix)
122 {
123  shutdown();
124  initialize(pset, prefix);
125 }
126 
128 {
129  TLOG_DEBUG("MetricManager") << "MetricManager is shutting down..." << TLOG_ENDL;
130  do_stop();
131 
132  if (initialized_)
133  {
134  for (auto& i : metric_plugins_)
135  {
136  try
137  {
138  std::string name = i->getLibName();
139  i.reset(nullptr);
140  TLOG_DEBUG("MetricManager") << "Metric Plugin " << name << " shutdown." << TLOG_ENDL;
141  }
142  catch (...)
143  {
144  TLOG_ERROR("MetricManager") <<
145  "Exception caught in MetricManager::shutdown(), error shutting down metric with name " <<
146  i->getLibName() << TLOG_ENDL;
147  }
148  }
149  initialized_ = false;
150  }
151 }
152 
153 void artdaq::MetricManager::sendMetric(std::string const& name, std::string const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
154 {
155  if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
156  else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
157  else if (active_)
158  {
159  if (metric_queue_.size() < metric_queue_max_size_)
160  {
161  std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
162  {
163  std::unique_lock<std::mutex> lk(metric_queue_mutex_);
164  metric_queue_.emplace(std::move(metric));
165  }
166  }
167  else
168  {
169  TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
170  missed_metric_calls_++;
171  }
172  metric_cv_.notify_all();
173  }
174 }
175 
176 void artdaq::MetricManager::sendMetric(std::string const& name, int const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
177 {
178  if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
179  else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
180  else if (active_)
181  {
182  if (metric_queue_.size() < metric_queue_max_size_)
183  {
184  std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
185  {
186  std::unique_lock<std::mutex> lk(metric_queue_mutex_);
187  metric_queue_.emplace(std::move(metric));
188  }
189  }
190  else
191  {
192  TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
193  missed_metric_calls_++;
194  }
195  metric_cv_.notify_all();
196  }
197 }
198 
199 void artdaq::MetricManager::sendMetric(std::string const& name, double const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
200 {
201  if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
202  else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
203  else if (active_)
204  {
205  if (metric_queue_.size() < metric_queue_max_size_)
206  {
207  std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
208  {
209  std::unique_lock<std::mutex> lk(metric_queue_mutex_);
210  metric_queue_.emplace(std::move(metric));
211  }
212  }
213  else
214  {
215  TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
216  missed_metric_calls_++;
217  }
218  metric_cv_.notify_all();
219  }
220 }
221 
222 void artdaq::MetricManager::sendMetric(std::string const& name, float const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
223 {
224  if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
225  else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
226  else if (active_)
227  {
228  if (metric_queue_.size() < metric_queue_max_size_)
229  {
230  std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
231  {
232  std::unique_lock<std::mutex> lk(metric_queue_mutex_);
233  metric_queue_.emplace(std::move(metric));
234  }
235  }
236  else
237  {
238  TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
239  missed_metric_calls_++;
240  }
241  metric_cv_.notify_all();
242  }
243 }
244 
245 void artdaq::MetricManager::sendMetric(std::string const& name, long unsigned int const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
246 {
247  if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
248  else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
249  else if (active_)
250  {
251  if (metric_queue_.size() < metric_queue_max_size_)
252  {
253  std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
254  {
255  std::unique_lock<std::mutex> lk(metric_queue_mutex_);
256  metric_queue_.emplace(std::move(metric));
257  }
258  }
259  else
260  {
261  TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
262  missed_metric_calls_++;
263  }
264  metric_cv_.notify_all();
265  }
266 }
267 
268 void artdaq::MetricManager::startMetricLoop_()
269 {
270  if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
271  TLOG_INFO("MetricManager") << "Starting Metric Sending Thread" << TLOG_ENDL;
272  boost::thread::attributes attrs;
273  attrs.set_stack_size(4096 * 200); // 800 KB
274  metric_sending_thread_ = boost::thread(attrs, boost::bind(&MetricManager::sendMetricLoop_, this));
275 }
276 
277 void artdaq::MetricManager::sendMetricLoop_()
278 {
279  auto last_send_time = std::chrono::steady_clock::time_point();
280  while (running_)
281  {
282  while (metric_queue_.empty() && running_)
283  {
284  std::unique_lock<std::mutex> lk(metric_mutex_);
285  metric_cv_.wait_for(lk, std::chrono::milliseconds(100));
286  auto now = std::chrono::steady_clock::now();
287  if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_send_time).count() > metric_send_interval_ms_)
288  {
289  for (auto& metric : metric_plugins_) { metric->sendMetrics(); }
290  last_send_time = now;
291  }
292  }
293 
294  auto temp_list = std::queue<std::unique_ptr<MetricData>>();
295  {
296  std::unique_lock<std::mutex> lk(metric_queue_mutex_);
297  temp_list.swap(metric_queue_);
298  temp_list.emplace(new MetricData("Metric Calls", temp_list.size(), "metrics", 4, MetricMode::Accumulate, "", false));
299  auto missed = missed_metric_calls_.exchange(0);
300 
301  temp_list.emplace(new MetricData("Missed Metric Calls", missed, "metrics", 4, MetricMode::Accumulate, "", false));
302  TLOG_TRACE("MetricManager") << "There are " << temp_list.size() << " Metric Calls to process (missed " << missed << ")" << TLOG_ENDL;
303  }
304 
305  while (temp_list.size() > 0)
306  {
307  auto data_ = std::move(temp_list.front());
308  temp_list.pop();
309  if (data_->Type == MetricType::InvalidMetric) continue;
310  if (!data_->UseNameOverride)
311  {
312  if (data_->MetricPrefix.size() > 0)
313  {
314  data_->Name = prefix_ + "." + data_->MetricPrefix + "." + data_->Name;
315  }
316  else
317  {
318  data_->Name = prefix_ + "." + data_->Name;
319  }
320  }
321 
322  for (auto& metric : metric_plugins_)
323  {
324  if (metric->getRunLevel() >= data_->Level)
325  {
326  try
327  {
328  metric->addMetricData(*data_);
329  last_send_time = std::chrono::steady_clock::now();
330  }
331  catch (...)
332  {
333  TLOG_ERROR("MetricManager") <<
334  "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
335  << metric->getLibName() << TLOG_ENDL;
336  }
337  }
338  }
339  }
340  }
341 
342  for (auto& metric : metric_plugins_)
343  {
344  try
345  {
346  metric->stopMetrics();
347  TLOG_DEBUG("MetricManager") << "Metric Plugin " << metric->getLibName() << " stopped." << TLOG_ENDL;
348  }
349  catch (...)
350  {
351  TLOG_ERROR("MetricManager") <<
352  "Exception caught in MetricManager::do_stop(), error stopping plugin with name " <<
353  metric->getLibName() << TLOG_ENDL;
354  }
355  }
356  TLOG_DEBUG("MetricManager") << "MetricManager has been stopped." << TLOG_ENDL;
357 }
void reinitialize(fhicl::ParameterSet const &pset, std::string prefix="")
Reinitialize all MetricPlugin Instances.
void shutdown()
Call the destructors for all configured MetricPlugin instances.
void initialize(fhicl::ParameterSet const &pset, std::string prefix="")
Initialize the MetricPlugin instances.
void sendMetric(std::string const &name, std::string const &value, std::string const &unit, int level, MetricMode mode, std::string const &metricPrefix="", bool useNameOverride=false)
Send a metric with the given parameters to any MetricPlugins with a threshold level &gt;= to level...
std::unique_ptr< MetricPlugin > makeMetricPlugin(std::string const &generator_plugin_spec, fhicl::ParameterSet const &ps)
Load a given MetricPlugin and return a pointer to it.
MetricMode
The Mode of the metric indicates how multiple metric values should be combined within a reporting int...
Definition: MetricData.hh:31
MetricManager()
Construct an instance of the MetricManager class.
void do_start()
Perform startup actions for each configured MetricPlugin.
void do_stop()
Stop sending metrics to the MetricPlugin instances.
virtual ~MetricManager() noexcept
MetricManager destructor.
Report the sum of all values. Use for counters to report accurate results.
Small structure used to hold a metric data point before sending to the metric plugins ...
Definition: MetricData.hh:41
Default, invalid value.
void do_resume()
Resume metric sending. Currently a No-Op.
void do_pause()
Pause metric sending. Currently a No-Op.