artdaq_utilities  v1_06_03
MetricPlugin.hh
1 // MetricPlugin.hh: Metric Plugin Interface
2 // Author: Eric Flumerfelt
3 // Last Modified: 11/05/2014 (Created)
4 //
5 // Defines the interface that any ARTDAQ metric plugin must implement
6 
7 #ifndef __METRIC_INTERFACE__
8 #define __METRIC_INTERFACE__
9 
10 #ifdef TRACE_NAME
11 #pragma push_macro("TRACE_NAME")
12 #undef TRACE_NAME
13 #define TRACE_NAME "MetricPlugin" /* a simple const char * */
14 #define TRACE_NAME_POP 1
15 #endif
16 #include "TRACE/trace.h" // TLOG(x,name)
17 
18 #include <chrono>
19 #include <string>
20 #include <unordered_map>
21 #include <bitset>
22 #include "fhiclcpp/ParameterSet.h"
23 #include "fhiclcpp/types/Atom.h"
24 #include "fhiclcpp/types/ConfigurationTable.h"
25 #include "fhiclcpp/types/Sequence.h"
26 
27 #include "artdaq-utilities/Plugins/MetricData.hh"
28 #include "cetlib/compiler_macros.h"
29 #ifndef FALLTHROUGH
30 #define FALLTHROUGH while (0)
31 #endif
32 
33 namespace artdaq {
39 {
40 public:
44  struct Config
45  {
47  fhicl::Atom<std::string> metricPluginType{fhicl::Name{"metricPluginType"}, fhicl::Comment{"The name of the metric plugin to load (may have additional configuration parameters"}};
49  fhicl::Atom<size_t> level{fhicl::Name{"level"}, fhicl::Comment{"The verbosity level threshold for this plugin. sendMetric calls with verbosity level greater than this will not be sent to the plugin. OPTIONAL"}, 0};
51  fhicl::Sequence<size_t> metric_levels{fhicl::Name{"metric_levels"}, fhicl::Comment{"A list of levels that should be enabled for this plugin. OPTIONAL"}, std::vector<size_t>()};
53  fhicl::Atom<std::string> level_string{fhicl::Name{"level_string"}, fhicl::Comment{"A string containing a comma-separated list of levels to enable. Ranges are supported. Example: \"1,2,4-10,11\" OPTIONAL"}, ""};
55  fhicl::Atom<double> reporting_interval{fhicl::Name{"reporting_interval"}, fhicl::Comment{"How often recorded metrics are sent to the underlying metric storage"}, 15.0};
57  fhicl::Atom<bool> send_zeros{fhicl::Name{"send_zeros"}, fhicl::Comment{"Whether zeros should be sent to the metric back-end when metrics are not reported in an interval and during shutdown"}, true};
58  };
60  using Parameters = fhicl::WrappedTable<Config>;
61 
70  explicit MetricPlugin(fhicl::ParameterSet const& ps, std::string const& app_name)
71  : pset(ps)
72  , app_name_(app_name)
73  , inhibit_(false)
74  , level_mask_(0ULL)
75  , sendZeros_(pset.get<bool>("send_zeros", true))
76  {
77  if (pset.has_key("level"))
78  {
79  for (size_t ii = 0; ii <= pset.get<size_t>("level"); ++ii)
80  {
81  level_mask_[ii] = true;
82  }
83  }
84  if (pset.has_key("metric_levels"))
85  {
86  auto levels = pset.get<std::vector<size_t>>("metric_levels");
87  for (auto& l : levels)
88  {
89  level_mask_[l] = true;
90  }
91  }
92  if (pset.has_key("level_string"))
93  {
94  auto string = pset.get<std::string>("level_string");
95  std::stringstream ss(string);
96  std::string token;
97  while (std::getline(ss, token, ','))
98  {
99  auto it = token.find("-");
100  if (it == 0 || it == token.size() - 1) continue;
101 
102  if (it != std::string::npos)
103  {
104  auto minStr = token.substr(0, it);
105  auto maxStr = token.substr(it + 1);
106  auto min = std::stoi(minStr);
107  auto max = std::stoi(maxStr);
108 
109  if (min > max) std::swap(min, max);
110  if (min > 63) min = 63;
111  if (max > 63) max = 63;
112 
113  for (int ii = min; ii <= max; ++ii)
114  {
115  level_mask_[ii] = true;
116  }
117  }
118  else
119  {
120  auto level = std::stoi(token);
121  if (level >= 0 && level < 63) level_mask_[level] = true;
122  }
123  }
124  }
125  if (level_mask_.to_ullong() == 0)
126  {
127  throw cet::exception("Configuration Error") // NOLINT(cert-err60-cpp)
128  << "No levels were enabled for this plugin! Please specify at least one of the following Parameters: \"level\", \"metric_levels\", or \"level_string\"!";
129  }
130  accumulationTime_ = pset.get<double>("reporting_interval", 15.0);
131  }
132 
136  virtual ~MetricPlugin() = default;
137 
139  //
140  // Interface Functions: These should be reimplemented in plugin classes!
141  //
143 
147  virtual std::string getLibName() const { return "ERROR"; }
148 
149 protected:
159  virtual void sendMetric_(const std::string& name, const std::string& value, const std::string& unit, const std::chrono::system_clock::time_point& interval_end) = 0;
160 
170  virtual void sendMetric_(const std::string& name, const int& value, const std::string& unit, const std::chrono::system_clock::time_point& interval_end) = 0;
171 
181  virtual void sendMetric_(const std::string& name, const double& value, const std::string& unit, const std::chrono::system_clock::time_point& interval_end) = 0;
182 
192  virtual void sendMetric_(const std::string& name, const float& value, const std::string& unit, const std::chrono::system_clock::time_point& interval_end) = 0;
193 
203  virtual void sendMetric_(const std::string& name, const uint64_t& value, const std::string& unit, const std::chrono::system_clock::time_point& interval_end) = 0;
204 
210  virtual void startMetrics_() = 0;
211 
217  virtual void stopMetrics_() = 0;
218 
220  //
221  // Implementation Functions: These should be called from ARTDAQ code!
222  //
224 public:
229  void addMetricData(std::unique_ptr<MetricData> const& data)
230  {
231  TLOG(22) << "Adding metric data for name " << data->Name;
232  if (data->Type == MetricType::StringMetric)
233  {
234  sendMetric_(data->Name, data->StringValue, data->Unit, std::chrono::system_clock::now());
235  }
236  else
237  {
238  if (metricRegistry_.count(data->Name) == 0)
239  {
240  metricRegistry_[data->Name] = *data;
241  }
242  metricData_[data->Name].push_back(*data);
243  TLOG(22) << "Current list size: " << metricData_[data->Name].size();
244  //sendMetrics();
245  }
246  }
247 
256  void sendMetrics(bool forceSend = false,
257  std::chrono::steady_clock::time_point interval_end = std::chrono::steady_clock::now())
258  {
259  TLOG(23) << "sendMetrics called" << std::endl;
260  for (auto& metric : metricData_)
261  {
262  if (readyToSend_(metric.first) || forceSend)
263  {
264  TLOG(24) << "Sending metric " << metric.first;
265  if (metric.second.empty() && metricRegistry_.count(metric.first))
266  {
267  TLOG(24) << "Sending zero";
268  sendZero_(metricRegistry_[metric.first]);
269  }
270  else if (!metric.second.empty())
271  {
272  TLOG(24) << "Aggregating " << metric.second.size() << " MetricData points";
273 
274  if ((metric.second.front().Mode & MetricMode::Persist) != MetricMode::None && metric.second.size() > 1)
275  {
276  TLOG(24) << "Metric is in Persist mode and multiple instances are present. Removing the first entry.";
277  metric.second.erase(metric.second.begin());
278  }
279 
280  MetricData& data = metric.second.front();
281 
282  auto it = ++(metric.second.begin());
283  while (it != metric.second.end())
284  {
285  data.Add(*it);
286  it = metric.second.erase(it);
287  }
288 
289  std::bitset<32> modeSet(static_cast<uint32_t>(data.Mode));
290  bool useSuffix = true;
291  if (modeSet.count() <= 1 || (modeSet.count() <= 2 && (data.Mode & MetricMode::Persist) != MetricMode::None)) useSuffix = false;
292 
293  if ((data.Mode & MetricMode::LastPoint) != MetricMode::None)
294  {
295  sendMetric_(data.Name + (useSuffix ? " - Last" : ""), data.Last, data.Unit, data.Type, to_system_clock(lastSendTime_[data.Name]));
296  }
297  if ((data.Mode & MetricMode::Accumulate) != MetricMode::None)
298  {
299  sendMetric_(data.Name + (useSuffix ? " - Total" : ""), data.Value, data.Unit, data.Type, to_system_clock(lastSendTime_[data.Name]));
300  }
301  if ((data.Mode & MetricMode::Average) != MetricMode::None)
302  {
303  double average = 0.0;
304  switch (data.Type)
305  {
307  average = data.Value.d / static_cast<double>(data.DataPointCount);
308  break;
310  average = data.Value.f / static_cast<double>(data.DataPointCount);
311  break;
313  average = data.Value.i / static_cast<double>(data.DataPointCount);
314  break;
316  average = data.Value.u / static_cast<double>(data.DataPointCount);
317  break;
318  default:
319  break;
320  }
321  sendMetric_(data.Name + (useSuffix ? " - Average" : ""), average, data.Unit, to_system_clock(lastSendTime_[data.Name]));
322  }
323  if ((data.Mode & MetricMode::Rate) != MetricMode::None)
324  {
325  double duration = std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(
326  interval_end - interval_start_[metric.first])
327  .count();
328  double rate = 0.0;
329  switch (data.Type)
330  {
332  rate = data.Value.d / duration;
333  break;
335  rate = data.Value.f / duration;
336  break;
338  rate = data.Value.i / duration;
339  break;
341  rate = data.Value.u / duration;
342  break;
343  default:
344  break;
345  }
346  sendMetric_(data.Name + (useSuffix ? " - Rate" : ""), rate, data.Unit + "/s", to_system_clock(lastSendTime_[data.Name]));
347  }
348  if ((data.Mode & MetricMode::Minimum) != MetricMode::None)
349  {
350  sendMetric_(data.Name + (useSuffix ? " - Min" : ""), data.Min, data.Unit, data.Type, to_system_clock(lastSendTime_[data.Name]));
351  }
352  if ((data.Mode & MetricMode::Maximum) != MetricMode::None)
353  {
354  sendMetric_(data.Name + (useSuffix ? " - Max" : ""), data.Max, data.Unit, data.Type, to_system_clock(lastSendTime_[data.Name]));
355  }
356 
357  if ((data.Mode & MetricMode::Persist) == MetricMode::None)
358  {
359  TLOG(24) << "Clearing metric data list sz=" << metric.second.size();
360  metric.second.clear();
361  TLOG(24) << "Cleared metric data list sz=" << metricData_[metric.first].size();
362  }
363  else
364  {
365  TLOG(24) << "Metric is Persisted, leaving " << metricData_[metric.first].size() << " entries (should be 1)";
366  }
367  }
368  interval_start_[metric.first] = interval_end;
369  }
370  }
371  TLOG(23) << "sendMetrics done" << std::endl;
372  }
373 
378 
383  void stopMetrics()
384  {
385  inhibit_ = true;
386  sendMetrics(true);
387  for (auto const& metric : metricRegistry_)
388  {
389  sendZero_(metric.second);
390  }
391  stopMetrics_();
392  inhibit_ = false;
393  }
394 
401  {
402  if (level > 63) level = 63;
403  if (level < 0) return true;
404  return level_mask_[level];
405  }
406 
412  {
413  for (auto& metric : metricData_)
414  {
415  if (!metric.second.empty())
416  {
417  TLOG(TLVL_TRACE) << "Metric " << metric.first << " has " << metric.second.size() << " pending MetricData instances" << std::endl;
418  return true;
419  }
420  }
421 
422  return false;
423  }
424 
425 protected:
426  fhicl::ParameterSet pset;
428  std::string app_name_;
429  bool inhibit_;
430  std::bitset<64> level_mask_;
431  bool sendZeros_;
432 
433 private:
434  MetricPlugin(const MetricPlugin&) = delete;
435  MetricPlugin(MetricPlugin&&) = delete;
436  MetricPlugin& operator=(const MetricPlugin&) = delete;
437  MetricPlugin& operator=(MetricPlugin&&) = delete;
438 
439  std::unordered_map<std::string, std::list<MetricData>> metricData_;
440  std::unordered_map<std::string, MetricData> metricRegistry_;
441  std::unordered_map<std::string, std::chrono::steady_clock::time_point> lastSendTime_;
442  std::unordered_map<std::string, std::chrono::steady_clock::time_point> interval_start_;
443 
444  std::chrono::system_clock::time_point to_system_clock(std::chrono::steady_clock::time_point const& t)
445  {
446  auto pt = std::chrono::system_clock::now() + (t - std::chrono::steady_clock::now());
447  return std::chrono::system_clock::time_point(std::chrono::duration_cast<std::chrono::system_clock::duration>(pt.time_since_epoch()));
448  }
449 
450  bool readyToSend_(std::string const& name)
451  {
452  auto now = std::chrono::steady_clock::now();
453  if (std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(now - lastSendTime_[name]).count() >= accumulationTime_)
454  {
455  lastSendTime_[name] = now;
456  return true;
457  }
458 
459  return false;
460  }
461 
462  void sendZero_(MetricData data)
463  {
464  if (sendZeros_)
465  {
466  std::bitset<32> modeSet(static_cast<uint32_t>(data.Mode));
467  bool useSuffix = true;
468  if (modeSet.count() <= 1) useSuffix = false;
469 
470  MetricData::MetricDataValue zero;
471  switch (data.Type)
472  {
474  zero.d = 0.0;
475  break;
477  zero.f = 0.0f;
478  break;
480  zero.i = 0;
481  break;
483  zero.u = 0;
484  break;
485  default:
486  break;
487  }
488 
489  if ((data.Mode & MetricMode::LastPoint) != MetricMode::None)
490  {
491  sendMetric_(data.Name + (useSuffix ? " - Last" : ""), zero, data.Unit, data.Type, std::chrono::system_clock::now());
492  }
493  if ((data.Mode & MetricMode::Accumulate) != MetricMode::None)
494  {
495  sendMetric_(data.Name + (useSuffix ? " - Total" : ""), zero, data.Unit, data.Type, std::chrono::system_clock::now());
496  }
497  if ((data.Mode & MetricMode::Average) != MetricMode::None)
498  {
499  sendMetric_(data.Name + (useSuffix ? " - Average" : ""), 0.0, data.Unit, std::chrono::system_clock::now());
500  }
501  if ((data.Mode & MetricMode::Rate) != MetricMode::None)
502  {
503  sendMetric_(data.Name + (useSuffix ? " - Rate" : ""), 0.0, data.Unit + "/s", std::chrono::system_clock::now());
504  }
505  if ((data.Mode & MetricMode::Minimum) != MetricMode::None)
506  {
507  sendMetric_(data.Name + (useSuffix ? " - Min" : ""), zero, data.Unit, data.Type, std::chrono::system_clock::now());
508  }
509  if ((data.Mode & MetricMode::Maximum) != MetricMode::None)
510  {
511  sendMetric_(data.Name + (useSuffix ? " - Max" : ""), zero, data.Unit, data.Type, std::chrono::system_clock::now());
512  }
513  }
514  }
515 
516  void sendMetric_(std::string const& name, MetricData::MetricDataValue data, std::string const& unit, MetricType type, std::chrono::system_clock::time_point const& interval_end)
517  {
518  switch (type)
519  {
521  sendMetric_(name, data.d, unit, interval_end);
522  break;
524  sendMetric_(name, data.f, unit, interval_end);
525  break;
527  sendMetric_(name, data.i, unit, interval_end);
528  break;
530  sendMetric_(name, data.u, unit, interval_end);
531  break;
532  default:
533  break;
534  }
535  }
536 };
537 } //End namespace artdaq
538 
539 #ifdef TRACE_NAME_POP
540 #pragma pop_macro("TRACE_NAME")
541 #undef TRACE_NAME_POP
542 #endif
543 #endif //End ifndef __METRIC_INTERFACE__
bool IsLevelEnabled(int level)
Determine if the given level is enabled for this MetricPlugin instance.
virtual void startMetrics_()=0
Perform any start-up actions necessary for the metric plugin.
The MetricPlugin class defines the interface that MetricManager uses to send metric data to the vario...
Definition: MetricPlugin.hh:38
void startMetrics()
Perform startup actions. Simply calls the virtual startMetrics_ function.
size_t DataPointCount
Number of data points accumulated in this MetricData
Definition: MetricData.hh:178
Report the sum of all values. Use for counters to report accurate results.
fhicl::ParameterSet pset
The ParameterSet used to configure the MetricPlugin.
fhicl::Atom< std::string > metricPluginType
The name of the metric plugin to load (may have additional configuration parameters.
Definition: MetricPlugin.hh:47
std::string Unit
Units of the metric
Definition: MetricData.hh:158
float f
Value of the metric, if it is a MetricType::FloatMetric.
Definition: MetricData.hh:109
Metric is a std::string (not in union)
std::string Name
Name of the metric
Definition: MetricData.hh:96
fhicl::Atom< double > reporting_interval
&quot;reporting_interval&quot; (Default: 15.0): The interval, in seconds, which the metric plugin will accumula...
Definition: MetricPlugin.hh:55
MetricMode Mode
Accumulation mode of the metric
Definition: MetricData.hh:166
bool Add(MetricData other)
Add two MetricData instances together
Definition: MetricData.hh:261
MetricType
This enumeration is used to identify the type of the metric instance (which value should be extraced ...
Definition: MetricData.hh:16
MetricDataValue Max
Maximum recorded vaule of this MetricData.
Definition: MetricData.hh:149
void stopMetrics()
Perform shutdown actions. Zeroes out all accumulators, and sends zeros for each metric. Calls stopMetrics_() for any plugin-defined shutdown actions.
Reports the minimum value recorded.
std::string app_name_
Name of the application which is sending metrics to this plugin.
Metric is a long unsigned int.
void sendMetrics(bool forceSend=false, std::chrono::steady_clock::time_point interval_end=std::chrono::steady_clock::now())
For each known metric, determine whether the reporting interval has elapsed, and if so...
over. Use to create rates from counters.
MetricPlugin(fhicl::ParameterSet const &ps, std::string const &app_name)
MetricPlugin Constructor.
Definition: MetricPlugin.hh:70
Report only the last value recorded. Useful for event counters, run numbers, etc. ...
Repots the maximum value recorded.
std::bitset< 64 > level_mask_
Bitset indicating for each possible metric level, whether this plugin will receive those metrics...
bool metricsPending()
Determine if metrics are waiting to be sent.
virtual std::string getLibName() const
Return the name of the current MetricPlugin instance.
virtual void sendMetric_(const std::string &name, const std::string &value, const std::string &unit, const std::chrono::system_clock::time_point &interval_end)=0
Send a metric to the underlying metric storage (file, Graphite, Ganglia, etc.)
MetricDataValue Value
Accumulated value of this MetricData
Definition: MetricData.hh:146
virtual void stopMetrics_()=0
Perform any shutdown actions necessary for the metric plugin.
int i
Value of the metric, if it is a MetricType::IntMetric.
Definition: MetricData.hh:107
Keep previous metric value in memory.
MetricType Type
Type of the metric
Definition: MetricData.hh:154
fhicl::Atom< std::string > level_string
&quot;level_string&quot; (OPTIONAL): A string containing a comma-separated list of levels to enable...
Definition: MetricPlugin.hh:53
fhicl::Atom< bool > send_zeros
&quot;send_zeros&quot; (Default: true): Whether zeros should be sent to the metric back-end when metrics are no...
Definition: MetricPlugin.hh:57
fhicl::WrappedTable< Config > Parameters
Used for ParameterSet validation (if desired)
Definition: MetricPlugin.hh:60
MetricDataValue Last
Last value of this MetricData.
Definition: MetricData.hh:147
fhicl::Atom< size_t > level
&quot;level&quot; (OPTIONAL): The verbosity level threshold for this plugin. sendMetric calls with verbosity le...
Definition: MetricPlugin.hh:49
double accumulationTime_
The amount of time to average metric values; except for accumulate=false metrics, will be the interva...
Small structure used to hold a metric data point before sending to the metric plugins ...
Definition: MetricData.hh:65
The Config struct defines the accepted configuration parameters for this class.
Definition: MetricPlugin.hh:44
uint64_t u
Value of the metric, if it is a MetricType::UnsignedMetric.
Definition: MetricData.hh:110
Report the average of all values. Use for rates to report accurate results.
MetricDataValue Min
Minimum recorded value of this MetricData.
Definition: MetricData.hh:148
fhicl::Sequence< size_t > metric_levels
&quot;metric_levels&quot; (OPTIONAL): A list of levels that should be enabled for this plugin.
Definition: MetricPlugin.hh:51
void addMetricData(std::unique_ptr< MetricData > const &data)
Send a metric value to the MetricPlugin.
bool sendZeros_
Whether zeros should be sent to this metric backend when metric instances are missing or at the end o...
virtual ~MetricPlugin()=default
Default virtual Desctructor.
double d
Value of the metric, if it is a MetricType::DoubleMetric.
Definition: MetricData.hh:108
bool inhibit_
Flag to indicate that the MetricPlugin is being stopped, and any metric back-ends which do not have a...