2 #define TRACE_NAME "SystemMetricCollector"
6 #include "SystemMetricCollector.hh"
7 #include "sys/sysinfo.h"
11 #define MLEVEL_PROCESS 6
14 #define MLEVEL_NETWORK 9
17 : cpuCount_(GetCPUCount_())
18 , nonIdleCPUPercent_(0)
20 , systemCPUPercent_(0)
22 , iowaitCPUPercent_(0)
25 , lastProcessCPUTimes_()
26 , lastProcessCPUTime_(0)
27 , sendProcessMetrics_(processMetrics)
28 , sendSystemMetrics_(systemMetrics)
30 lastCPU_ = ReadProcStat_();
31 lastProcessCPUTime_ = times(&lastProcessCPUTimes_);
32 thisNetStat_ = ReadProcNetDev_();
33 lastNetStat_ = thisNetStat_;
38 auto thisCPU = ReadProcStat_();
39 auto total =
static_cast<double>(thisCPU.total - lastCPU_.total);
43 nonIdleCPUPercent_ = 0;
45 systemCPUPercent_ = 0;
47 iowaitCPUPercent_ = 0;
52 nonIdleCPUPercent_ = (thisCPU.totalUsage - lastCPU_.totalUsage) * 100.0 * cpuCount_ / total;
53 userCPUPercent_ = (thisCPU.user + thisCPU.nice - lastCPU_.user - lastCPU_.nice) * 100.0 * cpuCount_ / total;
54 systemCPUPercent_ = (thisCPU.system - lastCPU_.system) * 100.0 * cpuCount_ / total;
55 idleCPUPercent_ = (thisCPU.idle - lastCPU_.idle) * 100.0 * cpuCount_ / total;
56 iowaitCPUPercent_ = (thisCPU.iowait - lastCPU_.iowait) * 100.0 * cpuCount_ / total;
57 irqCPUPercent_ = (thisCPU.irq + thisCPU.softirq - lastCPU_.irq - lastCPU_.softirq) * 100.0 * cpuCount_ / total;
64 struct tms this_times;
65 auto now = times(&this_times);
71 auto delta_t = now - lastProcessCPUTime_;
72 if (delta_t == 0)
return 0;
74 auto utime = this_times.tms_utime - lastProcessCPUTimes_.tms_utime;
75 auto stime = this_times.tms_stime - lastProcessCPUTimes_.tms_stime;
77 lastProcessCPUTime_ = now;
78 lastProcessCPUTimes_ = this_times;
80 return (utime + stime) * 100.0 /
static_cast<double>(delta_t);
85 struct sysinfo meminfo;
86 auto err = sysinfo(&meminfo);
89 return meminfo.freeram * meminfo.mem_unit;
96 struct sysinfo meminfo;
97 auto err = sysinfo(&meminfo);
100 return meminfo.bufferram * meminfo.mem_unit;
107 struct sysinfo meminfo;
108 auto err = sysinfo(&meminfo);
111 return meminfo.totalram * meminfo.mem_unit;
118 struct sysinfo meminfo;
119 auto err = sysinfo(&meminfo);
120 if (err == 0 && meminfo.totalram > 0)
122 auto available = meminfo.freeram + (buffers ? meminfo.bufferram : 0);
123 return available * 100.0 /
static_cast<double>(meminfo.totalram);
130 auto filp = fopen(
"/proc/self/statm",
"r");
132 fscanf(filp,
"%*u %lu", &mem);
134 return mem * sysconf(_SC_PAGESIZE);
139 auto proc = GetProcessMemUsage();
140 auto total = GetTotalRAM();
141 if (total == 0)
return 0;
142 return proc * 100.0 /
static_cast<double>(total);
148 return thisNetStat_.stats[ifname].recv_bytes - lastNetStat_.stats[ifname].recv_bytes;
154 return thisNetStat_.stats[ifname].send_bytes - lastNetStat_.stats[ifname].send_bytes;
160 return thisNetStat_.stats[ifname].recv_errs - lastNetStat_.stats[ifname].recv_errs;
165 auto filp = fopen(
"/proc/net/snmp",
"r");
167 char tcp_lbls[BFSZ_];
168 char tcp_data[BFSZ_];
169 char* bufptr = tcp_lbls;
172 #define TCP_LINE_TKN_ "Tcp:"
173 #define TCP_RETRANSSEGS_TKN_ "RetransSegs"
174 uint64_t retranssegs = 0;
175 while (fgets(bufptr, BFSZ_ - 1, filp) !=
nullptr)
176 if (strstr(bufptr, TCP_LINE_TKN_))
178 char *tokn_name, *tokn_data, *tokn_save, *data_save;
179 fgets(tcp_data, BFSZ_ - 1, filp);
180 tokn_name = strtok_r(tcp_lbls,
" ", &tokn_save);
181 tokn_data = strtok_r(tcp_data,
" ", &data_save);
182 while (tokn_name != NULL && strcmp(tokn_name, TCP_RETRANSSEGS_TKN_) != 0)
184 tokn_name = strtok_r(NULL,
" ", &tokn_save);
185 tokn_data = strtok_r(NULL,
" ", &data_save);
187 if (tokn_name) retranssegs = strtoull(tokn_data, 0, 0);
190 TRACE(TLVL_DEBUG + 10,
"retranssegs=%lu", retranssegs);
198 return thisNetStat_.stats[ifname].send_errs - lastNetStat_.stats[ifname].send_errs;
203 std::list<std::string> output;
204 for (
auto& i : thisNetStat_.stats)
206 output.push_back(i.first);
213 auto start_time = std::chrono::steady_clock::now();
214 std::list<std::unique_ptr<MetricData>> output;
215 if (sendProcessMetrics_)
220 if (sendSystemMetrics_)
234 for (
auto& ifname : GetNetworkInterfaceNames())
236 output.emplace_back(
new MetricData(ifname +
" Network Receive Rate", GetNetworkReceiveBytes(ifname),
"B", MLEVEL_NETWORK,
MetricMode::Rate,
"",
false));
237 output.emplace_back(
new MetricData(ifname +
" Network Send Rate", GetNetworkSendBytes(ifname),
"B", MLEVEL_NETWORK,
MetricMode::Rate,
"",
false));
238 output.emplace_back(
new MetricData(ifname +
" Network Send Errors", GetNetworkSendErrors(ifname),
"Errors", MLEVEL_NETWORK,
MetricMode::Accumulate,
"",
false));
239 output.emplace_back(
new MetricData(ifname +
" Network Receive Errors", GetNetworkReceiveErrors(ifname),
"Errors", MLEVEL_NETWORK,
MetricMode::Accumulate,
"",
false));
241 output.emplace_back(
new MetricData(
"Network TCP RetransSegs", GetNetworkTCPRetransSegs(),
"Segs", MLEVEL_NETWORK,
MetricMode::Rate,
"",
false));
244 TLOG(TLVL_DEBUG + 35)
245 <<
"Time to collect system metrics: "
246 << std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - start_time).count()
251 artdaq::SystemMetricCollector::cpustat artdaq::SystemMetricCollector::ReadProcStat_()
253 auto filp = fopen(
"/proc/stat",
"r");
256 fscanf(filp,
"cpu %lu %lu %lu %lu %lu %lu %lu", &this_cpu.user, &this_cpu.nice, &this_cpu.system,
257 &this_cpu.idle, &this_cpu.iowait, &this_cpu.irq, &this_cpu.softirq);
261 if (this_cpu.iowait < lastCPU_.iowait)
263 auto diff = lastCPU_.iowait - this_cpu.iowait;
264 lastCPU_.iowait = this_cpu.iowait;
265 lastCPU_.total -= diff;
266 lastCPU_.totalUsage -= diff;
269 this_cpu.totalUsage =
270 this_cpu.user + this_cpu.nice + this_cpu.system + this_cpu.iowait + this_cpu.irq + this_cpu.softirq;
271 this_cpu.total = this_cpu.totalUsage + this_cpu.idle;
276 size_t artdaq::SystemMetricCollector::GetCPUCount_()
279 std::ifstream file(
"/proc/stat");
282 while (std::getline(file, line))
289 if (line.find(
"cpu") == 0)
301 artdaq::SystemMetricCollector::netstats artdaq::SystemMetricCollector::ReadProcNetDev_()
303 auto filp = fopen(
"/proc/net/dev",
"r");
304 char buf[200], ifname_c[20];
305 auto start_time = std::chrono::steady_clock::now();
309 for (
int i = 0; i < 2; i++)
311 fgets(buf, 200, filp);
314 uint64_t rbytes, rerrs, rdrop, rfifo, rframe, tbytes, terrs, tdrop, tfifo, tcolls, tcarrier;
316 while (fgets(buf, 200, filp) !=
nullptr)
318 sscanf(buf,
" %[^:]: %lu %*u %lu %lu %lu %lu %*u %*u %lu %*u %lu %lu %lu %lu %lu", ifname_c, &rbytes, &rerrs,
319 &rdrop, &rfifo, &rframe, &tbytes, &terrs, &tdrop, &tfifo, &tcolls, &tcarrier);
321 std::string ifname(ifname_c);
324 auto total_rerrs = rerrs + rdrop + rfifo + rframe;
325 auto total_terrs = terrs + tdrop + tfifo + tcolls + tcarrier;
326 stat.recv_bytes = rbytes;
327 stat.send_bytes = tbytes;
328 stat.send_errs = total_terrs;
329 stat.recv_errs = total_rerrs;
331 output.stats[ifname] = stat;
333 output.collectionTime = start_time;
339 void artdaq::SystemMetricCollector::UpdateNetstat_()
341 auto start_time = std::chrono::steady_clock::now();
343 if (std::chrono::duration_cast<std::chrono::duration<
double, std::ratio<1>>>(start_time - thisNetStat_.collectionTime)
346 auto output = ReadProcNetDev_();
347 lastNetStat_ = thisNetStat_;
348 thisNetStat_ = output;
uint64_t GetNetworkTCPRetransSegs()
Return the current number of TCP (total) segments retransmitted, segments
Report the sum of all values. Use for counters to report accurate results.
uint64_t GetTotalRAM()
Get the total amount of RAM in the system
uint64_t GetNetworkReceiveErrors(std::string ifname)
Get the number of network receive errors in the last network collection interval (1.0 s)
uint64_t GetNetworkSendBytes(std::string ifname)
Get the amount of data sent to the network in the last network collection interval (1...
uint64_t GetProcessMemUsage()
Get the amount of RAM being used by this process
over. Use to create rates from counters.
Report only the last value recorded. Useful for event counters, run numbers, etc. ...
std::list< std::unique_ptr< MetricData > > SendMetrics()
Send the configured metrics
uint64_t GetNetworkSendErrors(std::string ifname)
Get the number of network send errors in the last network collection interval (1.0 s) ...
double GetAvailableRAMPercent(bool buffers)
Get the percentage of available RAM
std::list< std::string > GetNetworkInterfaceNames()
Get the names of the local network interfaces.
void GetSystemCPUUsage()
Calculate the system CPU usage percentages
uint64_t GetAvailableRAM()
Get the amount of available RAM in the system
Small structure used to hold a metric data point before sending to the metric plugins ...
double GetProcessCPUUsagePercent()
Return the current amount of CPU usage for the current process, %
Report the average of all values. Use for rates to report accurate results.
SystemMetricCollector(bool processMetrics, bool systemMetrics)
SystemMetricCollector Constructor
double GetProcessMemUsagePercent()
Get the amount of RAM being used by this process
uint64_t GetNetworkReceiveBytes(std::string ifname)
Get the amount of data received from the network in the last network collection interval (1...
uint64_t GetBufferedRAM()
Get the amount of RAM currently being used for cache