4 #include "SystemMetricCollector.hh"
5 #include "sys/sysinfo.h"
9 #define MLEVEL_PROCESS 6
12 #define MLEVEL_NETWORK 9
14 artdaq::SystemMetricCollector::SystemMetricCollector(
bool processMetrics,
bool systemMetrics)
15 : lastCPU_(), lastProcessCPUTimes_(), lastProcessCPUTime_(0), sendProcessMetrics_(processMetrics), sendSystemMetrics_(systemMetrics)
17 lastCPU_ = ReadProcStat_();
18 lastProcessCPUTime_ = times(&lastProcessCPUTimes_);
19 thisNetStat_ = ReadProcNetDev_();
20 lastNetStat_ = thisNetStat_;
23 double artdaq::SystemMetricCollector::GetSystemCPUUsagePercent()
25 auto thisCPU = ReadProcStat_();
26 auto totalUsage = thisCPU.totalUsage - lastCPU_.totalUsage;
27 auto total = thisCPU.total - lastCPU_.total;
29 return totalUsage * 100.0 /
static_cast<double>(total);
32 double artdaq::SystemMetricCollector::GetProcessCPUUsagePercent()
34 struct tms this_times;
35 auto now = times(&this_times);
41 auto delta_t = now - lastProcessCPUTime_;
42 auto utime = this_times.tms_utime - lastProcessCPUTimes_.tms_utime;
43 auto stime = this_times.tms_stime - lastProcessCPUTimes_.tms_stime;
45 lastProcessCPUTime_ = now;
46 lastProcessCPUTimes_ = this_times;
48 return utime + stime * 100.0 /
static_cast<double>(delta_t);
51 unsigned long artdaq::SystemMetricCollector::GetAvailableRAM()
53 struct sysinfo meminfo;
54 auto err = sysinfo(&meminfo);
57 return meminfo.freeram * meminfo.mem_unit;
62 unsigned long artdaq::SystemMetricCollector::GetBufferedRAM()
64 struct sysinfo meminfo;
65 auto err = sysinfo(&meminfo);
68 return meminfo.bufferram * meminfo.mem_unit;
73 unsigned long artdaq::SystemMetricCollector::GetTotalRAM()
75 struct sysinfo meminfo;
76 auto err = sysinfo(&meminfo);
79 return meminfo.totalram * meminfo.mem_unit;
84 double artdaq::SystemMetricCollector::GetAvailableRAMPercent(
bool buffers)
86 struct sysinfo meminfo;
87 auto err = sysinfo(&meminfo);
90 auto available = meminfo.freeram + (buffers ? meminfo.bufferram : 0);
91 return available * 100.0 /
static_cast<double>(meminfo.totalram);
96 unsigned long artdaq::SystemMetricCollector::GetProcessMemUsage()
98 auto filp = fopen(
"/proc/self/statm",
"r");
100 fscanf(filp,
"%*u %lu", &mem);
102 return mem * sysconf(_SC_PAGESIZE);
105 double artdaq::SystemMetricCollector::GetProcessMemUsagePercent()
107 auto proc = GetProcessMemUsage();
108 auto total = GetTotalRAM();
109 return proc * 100.0 /
static_cast<double>(total);
112 unsigned long artdaq::SystemMetricCollector::GetNetworkReceiveBytes()
115 return thisNetStat_.recv_bytes - lastNetStat_.recv_bytes;
118 unsigned long artdaq::SystemMetricCollector::GetNetworkSendBytes()
121 return thisNetStat_.send_bytes - lastNetStat_.send_bytes;
124 unsigned long artdaq::SystemMetricCollector::GetNetworkReceiveErrors()
127 return thisNetStat_.recv_errs - lastNetStat_.recv_errs;
130 unsigned long artdaq::SystemMetricCollector::GetNetworkSendErrors()
133 return thisNetStat_.send_errs - lastNetStat_.send_errs;
136 std::list<std::unique_ptr<artdaq::MetricData>> artdaq::SystemMetricCollector::SendMetrics()
138 auto start_time = std::chrono::steady_clock::now();
139 std::list<std::unique_ptr<MetricData>> output;
140 if (sendProcessMetrics_)
142 output.emplace_back(
new MetricData(
"Process CPU Usage", GetProcessCPUUsagePercent(),
"%", MLEVEL_PROCESS,
MetricMode::Average,
"",
false));
143 output.emplace_back(
new MetricData(
"Process RAM Usage", GetProcessMemUsage(),
"B", MLEVEL_PROCESS,
MetricMode::LastPoint,
"",
false));
145 if (sendSystemMetrics_)
147 output.emplace_back(
new MetricData(
"System CPU Usage", GetSystemCPUUsagePercent(),
"%", MLEVEL_CPU,
MetricMode::Average,
"",
false));
149 output.emplace_back(
new MetricData(
"Free RAM", GetAvailableRAM(),
"B", MLEVEL_RAM,
MetricMode::LastPoint,
"",
false));
150 output.emplace_back(
new MetricData(
"Total RAM", GetTotalRAM(),
"B", MLEVEL_RAM,
MetricMode::LastPoint,
"",
false));
151 output.emplace_back(
new MetricData(
"Available RAM", GetAvailableRAMPercent(
true),
"%", MLEVEL_RAM,
MetricMode::LastPoint,
"",
false));
153 output.emplace_back(
new MetricData(
"Network Receive Rate", GetNetworkReceiveBytes(),
"B", MLEVEL_NETWORK,
MetricMode::Rate,
"",
false));
154 output.emplace_back(
new MetricData(
"Network Send Rate", GetNetworkSendBytes(),
"B", MLEVEL_NETWORK,
MetricMode::Rate,
"",
false));
155 output.emplace_back(
new MetricData(
"Network Send Errors", GetNetworkSendErrors(),
"Errors", MLEVEL_NETWORK,
MetricMode::Accumulate,
"",
false));
156 output.emplace_back(
new MetricData(
"Network Receive Errors", GetNetworkReceiveErrors(),
"Errors", MLEVEL_NETWORK,
MetricMode::Accumulate,
"",
false));
160 <<
"Time to collect system metrics: "
161 << std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - start_time).count()
166 artdaq::SystemMetricCollector::cpustat artdaq::SystemMetricCollector::ReadProcStat_()
168 auto filp = fopen(
"/proc/stat",
"r");
171 fscanf(filp,
"cpu %llu %llu %llu %llu %llu %llu %llu", &this_cpu.user, &this_cpu.nice, &this_cpu.system,
172 &this_cpu.idle, &this_cpu.iowait, &this_cpu.irq, &this_cpu.softirq);
175 this_cpu.totalUsage =
176 this_cpu.user + this_cpu.nice + this_cpu.system + this_cpu.iowait + this_cpu.irq + this_cpu.softirq;
177 this_cpu.total = this_cpu.totalUsage + this_cpu.idle;
182 artdaq::SystemMetricCollector::netstat artdaq::SystemMetricCollector::ReadProcNetDev_()
184 auto filp = fopen(
"/proc/net/dev",
"r");
185 char buf[200], ifname[20];
187 auto start_time = std::chrono::steady_clock::now();
190 for (
int i = 0; i < 2; i++)
192 fgets(buf, 200, filp);
195 unsigned long rbytes, rerrs, rdrop, rfifo, rframe, tbytes, terrs, tdrop, tfifo, tcolls, tcarrier;
197 while (fgets(buf, 200, filp))
199 sscanf(buf,
"%[^:]: %lu %*u %lu %lu %lu %lu %*u %*u %lu %*u %lu %lu %lu %lu %lu", ifname, &rbytes, &rerrs,
200 &rdrop, &rfifo, &rframe, &tbytes, &terrs, &tdrop, &tfifo, &tcolls, &tcarrier);
202 if (ifname[0] ==
'e')
204 auto total_rerrs = rerrs + rdrop + rfifo + rframe;
205 auto total_terrs = terrs + tdrop + tfifo + tcolls + tcarrier;
206 output.recv_bytes += rbytes;
207 output.send_bytes += tbytes;
208 output.send_errs += total_terrs;
209 output.recv_errs += total_rerrs;
212 output.collectionTime = start_time;
218 void artdaq::SystemMetricCollector::UpdateNetstat_()
220 auto start_time = std::chrono::steady_clock::now();
222 if (std::chrono::duration_cast<std::chrono::duration<
double, std::ratio<1>>>(start_time - thisNetStat_.collectionTime)
225 auto output = ReadProcNetDev_();
226 lastNetStat_ = thisNetStat_;
227 thisNetStat_ = output;
Report the sum of all values. Use for counters to report accurate results.
Report only the last value recorded. Useful for event counters, run numbers, etc. ...
Report the average of all values. Use for rates to report accurate results.