00001
00002
00003
00004
00005
00006
00007
00008
00009 #define TRACE_NAME "MetricManager"
00010 #include "tracemf.h"
00011 #include "artdaq-utilities/Plugins/MetricManager.hh"
00012 #include "artdaq-utilities/Plugins/makeMetricPlugin.hh"
00013 #include "fhiclcpp/ParameterSet.h"
00014
00015 #include <chrono>
00016 #include <boost/exception/all.hpp>
00017
00018 artdaq::MetricManager::
00019 MetricManager() : metric_plugins_(0)
00020 , initialized_(false)
00021 , running_(false)
00022 , active_(false)
00023 , missed_metric_calls_(0)
00024 , metric_queue_max_size_(10000)
00025 {}
00026
00027 artdaq::MetricManager::~MetricManager()
00028 {
00029 shutdown();
00030 }
00031
00032 void artdaq::MetricManager::initialize(fhicl::ParameterSet const& pset, std::string prefix)
00033 {
00034 prefix_ = prefix;
00035 if (initialized_)
00036 {
00037 shutdown();
00038 }
00039 TLOG_INFO("MetricManager") << "Configuring metrics with parameter set:\n" << pset.to_string() << TLOG_ENDL;
00040
00041 std::vector<std::string> names = pset.get_pset_names();
00042
00043 for (auto name : names)
00044 {
00045 if (name == "metric_queue_size")
00046 {
00047 metric_queue_max_size_ = pset.get<size_t>("metric_queue_size");
00048 }
00049 else
00050 {
00051 try
00052 {
00053 TLOG_DEBUG("MetricManager") << "Constructing metric plugin with name " << name << TLOG_ENDL;
00054 fhicl::ParameterSet plugin_pset = pset.get<fhicl::ParameterSet>(name);
00055 metric_plugins_.push_back(makeMetricPlugin(
00056 plugin_pset.get<std::string>("metricPluginType", ""), plugin_pset));
00057 }
00058 catch (const cet::exception& e)
00059 {
00060 TLOG_ERROR("MetricManager") << "Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
00061 ", cet::exception object caught:" << e.explain_self() << TLOG_ENDL;
00062 }
00063 catch (const boost::exception& e)
00064 {
00065 TLOG_ERROR("MetricManager") << "Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
00066 ", boost::exception object caught: " << boost::diagnostic_information(e) << TLOG_ENDL;
00067 }
00068 catch (const std::exception& e)
00069 {
00070 TLOG_ERROR("MetricManager") << "Exception caught in MetricManager::initialize, error loading plugin with name " << name <<
00071 ", std::exception caught: " << e.what() << TLOG_ENDL;
00072 }
00073 catch (...)
00074 {
00075 TLOG_ERROR("MetricManager") << "Unknown Exception caught in MetricManager::initialize, error loading plugin with name " << name << TLOG_ENDL;
00076 }
00077 }
00078 }
00079
00080 initialized_ = true;
00081 }
00082
00083 void artdaq::MetricManager::do_start()
00084 {
00085 if (!running_)
00086 {
00087 TLOG_DEBUG("MetricManager") << "Starting MetricManager" << TLOG_ENDL;
00088 for (auto& metric : metric_plugins_)
00089 {
00090 try
00091 {
00092 metric->startMetrics();
00093 TLOG_INFO("MetricManager") << "Metric Plugin " << metric->getLibName() << " started." << TLOG_ENDL;
00094 active_ = true;
00095 }
00096 catch (...)
00097 {
00098 TLOG_ERROR("MetricManager") <<
00099 "Exception caught in MetricManager::do_start(), error starting plugin with name " <<
00100 metric->getLibName() << TLOG_ENDL;
00101 }
00102 }
00103 running_ = true;
00104 startMetricLoop_();
00105 }
00106 }
00107
00108 void artdaq::MetricManager::do_stop()
00109 {
00110 TLOG_DEBUG("MetricManager") << "Stopping Metrics" << TLOG_ENDL;
00111 running_ = false;
00112 metric_cv_.notify_all();
00113 TLOG_DEBUG("MetricManager") << "Joining Metric-Sending thread" << TLOG_ENDL;
00114 if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
00115 TLOG_DEBUG("MetricManager") << "do_stop Complete" << TLOG_ENDL;
00116 }
00117
00118 void artdaq::MetricManager::do_pause() { }
00119 void artdaq::MetricManager::do_resume() { }
00120
00121 void artdaq::MetricManager::reinitialize(fhicl::ParameterSet const& pset, std::string prefix)
00122 {
00123 shutdown();
00124 initialize(pset, prefix);
00125 }
00126
00127 void artdaq::MetricManager::shutdown()
00128 {
00129 TLOG_DEBUG("MetricManager") << "MetricManager is shutting down..." << TLOG_ENDL;
00130 do_stop();
00131
00132 if (initialized_)
00133 {
00134 for (auto& i : metric_plugins_)
00135 {
00136 try
00137 {
00138 std::string name = i->getLibName();
00139 i.reset(nullptr);
00140 TLOG_DEBUG("MetricManager") << "Metric Plugin " << name << " shutdown." << TLOG_ENDL;
00141 }
00142 catch (...)
00143 {
00144 TLOG_ERROR("MetricManager") <<
00145 "Exception caught in MetricManager::shutdown(), error shutting down metric with name " <<
00146 i->getLibName() << TLOG_ENDL;
00147 }
00148 }
00149 initialized_ = false;
00150 }
00151 }
00152
00153 void artdaq::MetricManager::sendMetric(std::string const& name, std::string const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
00154 {
00155 if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
00156 else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
00157 else if (active_)
00158 {
00159 if (metric_queue_.size() < metric_queue_max_size_)
00160 {
00161 std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
00162 {
00163 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
00164 metric_queue_.emplace(std::move(metric));
00165 }
00166 }
00167 else
00168 {
00169 TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
00170 missed_metric_calls_++;
00171 }
00172 metric_cv_.notify_all();
00173 }
00174 }
00175
00176 void artdaq::MetricManager::sendMetric(std::string const& name, int const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
00177 {
00178 if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
00179 else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
00180 else if (active_)
00181 {
00182 if (metric_queue_.size() < metric_queue_max_size_)
00183 {
00184 std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
00185 {
00186 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
00187 metric_queue_.emplace(std::move(metric));
00188 }
00189 }
00190 else
00191 {
00192 TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
00193 missed_metric_calls_++;
00194 }
00195 metric_cv_.notify_all();
00196 }
00197 }
00198
00199 void artdaq::MetricManager::sendMetric(std::string const& name, double const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
00200 {
00201 if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
00202 else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
00203 else if (active_)
00204 {
00205 if (metric_queue_.size() < metric_queue_max_size_)
00206 {
00207 std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
00208 {
00209 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
00210 metric_queue_.emplace(std::move(metric));
00211 }
00212 }
00213 else
00214 {
00215 TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
00216 missed_metric_calls_++;
00217 }
00218 metric_cv_.notify_all();
00219 }
00220 }
00221
00222 void artdaq::MetricManager::sendMetric(std::string const& name, float const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
00223 {
00224 if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
00225 else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
00226 else if (active_)
00227 {
00228 if (metric_queue_.size() < metric_queue_max_size_)
00229 {
00230 std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
00231 {
00232 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
00233 metric_queue_.emplace(std::move(metric));
00234 }
00235 }
00236 else
00237 {
00238 TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
00239 missed_metric_calls_++;
00240 }
00241 metric_cv_.notify_all();
00242 }
00243 }
00244
00245 void artdaq::MetricManager::sendMetric(std::string const& name, long unsigned int const& value, std::string const& unit, int level, MetricMode mode, std::string const& metricPrefix, bool useNameOverride)
00246 {
00247 if (!initialized_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager has not yet been initialized!" << TLOG_ENDL; }
00248 else if (!running_) { TLOG_WARNING("MetricManager") << "Attempted to send metric when MetricManager stopped!" << TLOG_ENDL; }
00249 else if (active_)
00250 {
00251 if (metric_queue_.size() < metric_queue_max_size_)
00252 {
00253 std::unique_ptr<MetricData> metric(new MetricData(name, value, unit, level, mode, metricPrefix, useNameOverride));
00254 {
00255 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
00256 metric_queue_.emplace(std::move(metric));
00257 }
00258 }
00259 else
00260 {
00261 TLOG_ARB(10, "MetricManager") << "Rejecting metric because queue full" << TLOG_ENDL;
00262 missed_metric_calls_++;
00263 }
00264 metric_cv_.notify_all();
00265 }
00266 }
00267
00268 void artdaq::MetricManager::startMetricLoop_()
00269 {
00270 if (metric_sending_thread_.joinable()) metric_sending_thread_.join();
00271 TLOG_INFO("MetricManager") << "Starting Metric Sending Thread" << TLOG_ENDL;
00272 boost::thread::attributes attrs;
00273 attrs.set_stack_size(4096 * 200);
00274 metric_sending_thread_ = boost::thread(attrs, boost::bind(&MetricManager::sendMetricLoop_, this));
00275 }
00276
00277 void artdaq::MetricManager::sendMetricLoop_()
00278 {
00279 auto last_send_time = std::chrono::steady_clock::time_point();
00280 while (running_)
00281 {
00282 while (metric_queue_.empty() && running_)
00283 {
00284 std::unique_lock<std::mutex> lk(metric_mutex_);
00285 metric_cv_.wait_for(lk, std::chrono::milliseconds(100));
00286 auto now = std::chrono::steady_clock::now();
00287 if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_send_time).count() > metric_send_interval_ms_)
00288 {
00289 for (auto& metric : metric_plugins_) { metric->sendMetrics(); }
00290 last_send_time = now;
00291 }
00292 }
00293
00294 auto temp_list = std::queue<std::unique_ptr<MetricData>>();
00295 {
00296 std::unique_lock<std::mutex> lk(metric_queue_mutex_);
00297 temp_list.swap(metric_queue_);
00298 temp_list.emplace(new MetricData("Metric Calls", temp_list.size(), "metrics", 4, MetricMode::Accumulate, "", false));
00299 auto missed = missed_metric_calls_.exchange(0);
00300
00301 temp_list.emplace(new MetricData("Missed Metric Calls", missed, "metrics", 4, MetricMode::Accumulate, "", false));
00302 TLOG_TRACE("MetricManager") << "There are " << temp_list.size() << " Metric Calls to process (missed " << missed << ")" << TLOG_ENDL;
00303 }
00304
00305 while (temp_list.size() > 0)
00306 {
00307 auto data_ = std::move(temp_list.front());
00308 temp_list.pop();
00309 if (data_->Type == MetricType::InvalidMetric) continue;
00310 if (!data_->UseNameOverride)
00311 {
00312 if (data_->MetricPrefix.size() > 0)
00313 {
00314 data_->Name = prefix_ + "." + data_->MetricPrefix + "." + data_->Name;
00315 }
00316 else
00317 {
00318 data_->Name = prefix_ + "." + data_->Name;
00319 }
00320 }
00321
00322 for (auto& metric : metric_plugins_)
00323 {
00324 if (metric->getRunLevel() >= data_->Level)
00325 {
00326 try
00327 {
00328 metric->addMetricData(*data_);
00329 last_send_time = std::chrono::steady_clock::now();
00330 }
00331 catch (...)
00332 {
00333 TLOG_ERROR("MetricManager") <<
00334 "Error in MetricManager::sendMetric: error sending value to metric plugin with name "
00335 << metric->getLibName() << TLOG_ENDL;
00336 }
00337 }
00338 }
00339 }
00340 }
00341
00342 for (auto& metric : metric_plugins_)
00343 {
00344 try
00345 {
00346 metric->stopMetrics();
00347 TLOG_DEBUG("MetricManager") << "Metric Plugin " << metric->getLibName() << " stopped." << TLOG_ENDL;
00348 }
00349 catch (...)
00350 {
00351 TLOG_ERROR("MetricManager") <<
00352 "Exception caught in MetricManager::do_stop(), error stopping plugin with name " <<
00353 metric->getLibName() << TLOG_ENDL;
00354 }
00355 }
00356 TLOG_DEBUG("MetricManager") << "MetricManager has been stopped." << TLOG_ENDL;
00357 }