artdaq_utilities  v1_05_00
SystemMetricCollector.cc
1 #include "trace.h"
2 
3 #include <chrono>
4 #include "SystemMetricCollector.hh"
5 #include "sys/sysinfo.h"
6 #include "sys/types.h"
7 #include "unistd.h"
8 
9 #define MLEVEL_PROCESS 6
10 #define MLEVEL_CPU 7
11 #define MLEVEL_RAM 8
12 #define MLEVEL_NETWORK 9
13 
14 artdaq::SystemMetricCollector::SystemMetricCollector(bool processMetrics, bool systemMetrics)
15  : lastCPU_(), lastProcessCPUTimes_(), lastProcessCPUTime_(0), sendProcessMetrics_(processMetrics), sendSystemMetrics_(systemMetrics)
16 {
17  lastCPU_ = ReadProcStat_();
18  lastProcessCPUTime_ = times(&lastProcessCPUTimes_);
19  thisNetStat_ = ReadProcNetDev_();
20  lastNetStat_ = thisNetStat_;
21 }
22 
23 double artdaq::SystemMetricCollector::GetSystemCPUUsagePercent()
24 {
25  auto thisCPU = ReadProcStat_();
26  auto totalUsage = thisCPU.totalUsage - lastCPU_.totalUsage;
27  auto total = thisCPU.total - lastCPU_.total;
28  lastCPU_ = thisCPU;
29  return totalUsage * 100.0 / static_cast<double>(total);
30 }
31 
32 double artdaq::SystemMetricCollector::GetProcessCPUUsagePercent()
33 {
34  struct tms this_times;
35  auto now = times(&this_times);
36 
37  if (now < 0)
38  {
39  return 0.0;
40  }
41  auto delta_t = now - lastProcessCPUTime_;
42  auto utime = this_times.tms_utime - lastProcessCPUTimes_.tms_utime;
43  auto stime = this_times.tms_stime - lastProcessCPUTimes_.tms_stime;
44 
45  lastProcessCPUTime_ = now;
46  lastProcessCPUTimes_ = this_times;
47 
48  return utime + stime * 100.0 / static_cast<double>(delta_t);
49 }
50 
51 unsigned long artdaq::SystemMetricCollector::GetAvailableRAM()
52 {
53  struct sysinfo meminfo;
54  auto err = sysinfo(&meminfo);
55  if (err == 0)
56  {
57  return meminfo.freeram * meminfo.mem_unit;
58  }
59  return 0;
60 }
61 
62 unsigned long artdaq::SystemMetricCollector::GetBufferedRAM()
63 {
64  struct sysinfo meminfo;
65  auto err = sysinfo(&meminfo);
66  if (err == 0)
67  {
68  return meminfo.bufferram * meminfo.mem_unit;
69  }
70  return 0;
71 }
72 
73 unsigned long artdaq::SystemMetricCollector::GetTotalRAM()
74 {
75  struct sysinfo meminfo;
76  auto err = sysinfo(&meminfo);
77  if (err == 0)
78  {
79  return meminfo.totalram * meminfo.mem_unit;
80  }
81  return 0;
82 }
83 
84 double artdaq::SystemMetricCollector::GetAvailableRAMPercent(bool buffers)
85 {
86  struct sysinfo meminfo;
87  auto err = sysinfo(&meminfo);
88  if (err == 0)
89  {
90  auto available = meminfo.freeram + (buffers ? meminfo.bufferram : 0);
91  return available * 100.0 / static_cast<double>(meminfo.totalram);
92  }
93  return 0.0;
94 }
95 
96 unsigned long artdaq::SystemMetricCollector::GetProcessMemUsage()
97 {
98  auto filp = fopen("/proc/self/statm", "r");
99  unsigned long mem;
100  fscanf(filp, "%*u %lu", &mem);
101  fclose(filp);
102  return mem * sysconf(_SC_PAGESIZE);
103 }
104 
105 double artdaq::SystemMetricCollector::GetProcessMemUsagePercent()
106 {
107  auto proc = GetProcessMemUsage();
108  auto total = GetTotalRAM();
109  return proc * 100.0 / static_cast<double>(total);
110 }
111 
112 unsigned long artdaq::SystemMetricCollector::GetNetworkReceiveBytes()
113 {
114  UpdateNetstat_();
115  return thisNetStat_.recv_bytes - lastNetStat_.recv_bytes;
116 }
117 
118 unsigned long artdaq::SystemMetricCollector::GetNetworkSendBytes()
119 {
120  UpdateNetstat_();
121  return thisNetStat_.send_bytes - lastNetStat_.send_bytes;
122 }
123 
124 unsigned long artdaq::SystemMetricCollector::GetNetworkReceiveErrors()
125 {
126  UpdateNetstat_();
127  return thisNetStat_.recv_errs - lastNetStat_.recv_errs;
128 }
129 
130 unsigned long artdaq::SystemMetricCollector::GetNetworkSendErrors()
131 {
132  UpdateNetstat_();
133  return thisNetStat_.send_errs - lastNetStat_.send_errs;
134 }
135 
136 std::list<std::unique_ptr<artdaq::MetricData>> artdaq::SystemMetricCollector::SendMetrics()
137 {
138  auto start_time = std::chrono::steady_clock::now();
139  std::list<std::unique_ptr<MetricData>> output;
140  if (sendProcessMetrics_)
141  {
142  output.emplace_back(new MetricData("Process CPU Usage", GetProcessCPUUsagePercent(), "%", MLEVEL_PROCESS, MetricMode::Average, "", false));
143  output.emplace_back(new MetricData("Process RAM Usage", GetProcessMemUsage(), "B", MLEVEL_PROCESS, MetricMode::LastPoint, "", false));
144  }
145  if (sendSystemMetrics_)
146  {
147  output.emplace_back(new MetricData("System CPU Usage", GetSystemCPUUsagePercent(), "%", MLEVEL_CPU, MetricMode::Average, "", false));
148 
149  output.emplace_back(new MetricData("Free RAM", GetAvailableRAM(), "B", MLEVEL_RAM, MetricMode::LastPoint, "", false));
150  output.emplace_back(new MetricData("Total RAM", GetTotalRAM(), "B", MLEVEL_RAM, MetricMode::LastPoint, "", false));
151  output.emplace_back(new MetricData("Available RAM", GetAvailableRAMPercent(true), "%", MLEVEL_RAM, MetricMode::LastPoint, "", false));
152 
153  output.emplace_back(new MetricData("Network Receive Rate", GetNetworkReceiveBytes(), "B", MLEVEL_NETWORK, MetricMode::Rate, "", false));
154  output.emplace_back(new MetricData("Network Send Rate", GetNetworkSendBytes(), "B", MLEVEL_NETWORK, MetricMode::Rate, "", false));
155  output.emplace_back(new MetricData("Network Send Errors", GetNetworkSendErrors(), "Errors", MLEVEL_NETWORK, MetricMode::Accumulate, "", false));
156  output.emplace_back(new MetricData("Network Receive Errors", GetNetworkReceiveErrors(), "Errors", MLEVEL_NETWORK, MetricMode::Accumulate, "", false));
157  }
158 
159  TLOG(TLVL_DEBUG)
160  << "Time to collect system metrics: "
161  << std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - start_time).count()
162  << " us.";
163  return output;
164 }
165 
166 artdaq::SystemMetricCollector::cpustat artdaq::SystemMetricCollector::ReadProcStat_()
167 {
168  auto filp = fopen("/proc/stat", "r");
169  cpustat this_cpu;
170 
171  fscanf(filp, "cpu %llu %llu %llu %llu %llu %llu %llu", &this_cpu.user, &this_cpu.nice, &this_cpu.system,
172  &this_cpu.idle, &this_cpu.iowait, &this_cpu.irq, &this_cpu.softirq);
173  fclose(filp);
174 
175  this_cpu.totalUsage =
176  this_cpu.user + this_cpu.nice + this_cpu.system + this_cpu.iowait + this_cpu.irq + this_cpu.softirq;
177  this_cpu.total = this_cpu.totalUsage + this_cpu.idle;
178 
179  return this_cpu;
180 }
181 
182 artdaq::SystemMetricCollector::netstat artdaq::SystemMetricCollector::ReadProcNetDev_()
183 {
184  auto filp = fopen("/proc/net/dev", "r");
185  char buf[200], ifname[20];
186  netstat output;
187  auto start_time = std::chrono::steady_clock::now();
188 
189  // skip first two lines
190  for (int i = 0; i < 2; i++)
191  {
192  fgets(buf, 200, filp);
193  }
194 
195  unsigned long rbytes, rerrs, rdrop, rfifo, rframe, tbytes, terrs, tdrop, tfifo, tcolls, tcarrier;
196 
197  while (fgets(buf, 200, filp))
198  {
199  sscanf(buf, "%[^:]: %lu %*u %lu %lu %lu %lu %*u %*u %lu %*u %lu %lu %lu %lu %lu", ifname, &rbytes, &rerrs,
200  &rdrop, &rfifo, &rframe, &tbytes, &terrs, &tdrop, &tfifo, &tcolls, &tcarrier);
201 
202  if (ifname[0] == 'e')
203  {
204  auto total_rerrs = rerrs + rdrop + rfifo + rframe;
205  auto total_terrs = terrs + tdrop + tfifo + tcolls + tcarrier;
206  output.recv_bytes += rbytes;
207  output.send_bytes += tbytes;
208  output.send_errs += total_terrs;
209  output.recv_errs += total_rerrs;
210  }
211  }
212  output.collectionTime = start_time;
213  fclose(filp);
214 
215  return output;
216 }
217 
218 void artdaq::SystemMetricCollector::UpdateNetstat_()
219 {
220  auto start_time = std::chrono::steady_clock::now();
221  // Only collect network stats once per second
222  if (std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(start_time - thisNetStat_.collectionTime)
223  .count() > 1.0)
224  {
225  auto output = ReadProcNetDev_();
226  lastNetStat_ = thisNetStat_;
227  thisNetStat_ = output;
228  }
229 }
Report the sum of all values. Use for counters to report accurate results.
Report only the last value recorded. Useful for event counters, run numbers, etc. ...
Report the average of all values. Use for rates to report accurate results.