artdaq  v3_07_02
periodic_cmd_stats.cc
1 // This file (periodic_cmd_stats.cc) was created by Ron Rechenmacher <ron@fnal.gov> on
2 // Jan 5, 2017. "TERMS AND CONDITIONS" governing this file are in the README
3 // or COPYING file. If you do not have such a file, one can be obtained by
4 // contacting Ron or Fermi Lab in Batavia IL, 60510, phone: 630-840-3000.
5 // $RCSfile: periodic_cmd_stats.cc,v $
6 static const char *rev = "$Revision: 1.19 $$Date: 2018/06/28 21:14:28 $";
7 
8 // make periodic_cmd_stats
9 // OR make periodic_cmd_stats CXX="g++ -std=c++0x -DDO_TRACE"
10 
11 #define USAGE \
12  "\
13  usage: %s --cmd=<cmd>\n\
14 examples: %s --cmd='sleep 25' # this will take about a minute\n\
15  %s --cmd='taskset -c 4 dd if=/dev/zero of=/dev/sdf bs=50M count=500 oflag=direct' --disk=sdf\n\
16  %s --cmd='dd if=/dev/zero of=t.dat count=500' --stat='md*MB/s?/proc/diskstats?/md[0-3]/?$10?(1.0/2048)?yes'\n\
17  gnuplot -e png=0 `/bin/ls -t periodic_*_stats.out|head -1`\n\
18 For each cmd, record CPU info. Additionally, record total system CPU\n\
19 (can be >100 w/ multicore), file system Cached and Dirty %%, plus any\n\
20 other stats specified by --stats options\n\
21 options:\n\
22 --cmd= can have multiple\n\
23 --Cmd= run cmd, but dont graph CPU (can have multiple)\n\
24 --pid= graph comma or space sep. list of pids (getting cmd from /proc/)\n\
25 --disk= automatically build --stat for disk(s)\n\
26 --stat= desc?file?linespec?fieldspec?multiplier?rate\n\
27  builtin = CPUnode,Cached,Dirty\n\
28  cmd-builtin = CPUcmdN, CPU+cmdN\n\
29 --out-dir= output dir (for 2+ output files; stats and cmd+)\n\
30 \n\
31 --period= (float) default=%s\n\
32 --pre= the number of periods wait before exec of --cmd\n\
33 --post= the number of periods to loop after cmd exits\n\
34 --graph= add to graph any possibly included non-graphed metrics\n\
35 \n\
36 --no-defaults for no default stats (ie. cpu-parent,cpu-children)\n\
37 --init= default dropcache if root. NOT implemented yet\n\
38 --duration=\n\
39 --comment=\n\
40 --yrange=400:1400\n\
41 --y2max=\n\
42 --y2incr=\n\
43 --pause=0\n\
44 --sys-iowait,-w include the system iowait on the graph\n\
45 --cmd-iowait include cmd iowait on the graph\n\
46 --fault,-f\n\
47 ", \
48  basename(argv[0]), basename(argv[0]), basename(argv[0]), basename(argv[0]), opt_period
49 
50 enum
51 {
52  s_desc,
53  s_file,
54  s_linespec,
55  s_fieldspec,
56  s_multiplier,
57  s_rate
58 };
59 
60 #include <fcntl.h> // O_WRONLY|O_CREAT, open
61 #include <getopt.h> // getopt_long, {no,required,optional}_argument, extern char *optarg; extern int opt{ind,err,opt}
62 #include <signal.h> /* sigaction, siginfo_t, sigset_t */
63 #include <stdio.h> // printf
64 #include <sys/time.h> /* gettimeofday, timeval */
65 #include <sys/utsname.h> // uname
66 #include <sys/wait.h> // wait
67 #include <unistd.h> // getpid, sysconf
68 #include <fstream> // std::ifstream
69 #include <sstream> // std::stringstream
70 #include <string>
71 #include <thread>
72 #include <vector>
73 
74 #ifdef DO_TRACE
75 #define TRACE_NAME "periodic_cmd_stats"
76 #include "trace.h"
77 #else
78 #include <stdarg.h> /* va_list */
79 #include <string.h> /* memcpy */
80 #include <string>
81 static void trace_ap(const char *msg, va_list ap)
82 {
83  char m_[1024];
84  unsigned len = strlen(msg);
85  len = len < (sizeof(m_) - 2) ? len : (sizeof(m_) - 2);
86  memcpy(m_, msg, len + 1);
87  if (m_[len - 1] != '\n') memcpy(&(m_[len]), "\n", 2);
88  vprintf(m_, ap);
89  va_end(ap);
90 }
91 static void trace_p(const char *msg, ...) __attribute__((format(printf, 1, 2)));
92 static void trace_p(const char *msg, ...)
93 {
94  va_list ap;
95  va_start(ap, msg);
96  trace_ap(msg, ap);
97  va_end(ap);
98 }
99 static void trace_p(const std::string msg, ...)
100 {
101  va_list ap;
102  va_start(ap, msg);
103  trace_ap(msg.c_str(), ap);
104  va_end(ap);
105 }
106 #define TRACE(lvl, ...) \
107  do \
108  if (lvl <= 1) trace_p(__VA_ARGS__); \
109  while (0)
110 #define TRACE_CNTL(xyzz, ...)
111 #endif
112 
113 /* GLOBALS */
114 int opt_v = 1;
115 char *opt_init = NULL;
116 std::vector<std::string> opt_cmd;
117 std::vector<std::string> opt_Cmd;
118 std::string opt_pid;
119 std::string opt_disk;
120 std::string opt_stats;
121 std::string opt_outdir("");
122 std::string opt_graph("CPUnode,Cached,Dirty,Free"); // CPU+ will always be graphed
123 const char *opt_period = "5.0";
124 std::string opt_comment;
125 int opt_pre = 6; // number of periods to sleepB4exec
126 int opt_post = 6;
127 int opt_ymin = 0;
128 int opt_ymax = 2000;
129 int opt_yincr = 200;
130 int opt_y2max = 200;
131 int opt_y2incr = 20;
132 int opt_sys_iowait = 0;
133 int opt_cmd_iowait = 0;
134 int opt_fault = 0;
135 
136 std::vector<pid_t> g_pid_vec;
137 
138 void charreplace(char *instr, char oldc, char newc)
139 {
140  while (*instr)
141  {
142  if (*instr == oldc)
143  *instr = newc;
144  ++instr;
145  }
146 }
147 
148 void parse_args(int argc, char *argv[])
149 {
150  char *cp;
151  // parse opt, optargs, and args
152  while (1)
153  {
154  int opt;
155  static struct option long_options[] = {
156  // name has_arg *flag val
157  {"help", no_argument, 0, 'h'},
158  {"init", required_argument, 0, 'i'},
159  {"cmd", required_argument, 0, 'c'},
160  {"Cmd", required_argument, 0, 'C'},
161  {"disk", required_argument, 0, 'd'},
162  {"stat", required_argument, 0, 's'},
163  {"out-dir", required_argument, 0, 'o'},
164  {"period", required_argument, 0, 'p'},
165  {"sys-iowait", no_argument, 0, 'w'},
166  {"fault", no_argument, 0, 'f'},
167  {"pid", required_argument, 0, 'P'},
168  {"ymax", required_argument, 0, 1},
169  {"yincr", required_argument, 0, 2},
170  {"y2max", required_argument, 0, 3},
171  {"y2incr", required_argument, 0, 4},
172  {"pre", required_argument, 0, 5},
173  {"post", required_argument, 0, 6},
174  {"graph", required_argument, 0, 7},
175  {"yrange", required_argument, 0, 8},
176  {"comment", required_argument, 0, 9},
177  {"cmd-iowait", no_argument, 0, 10},
178  {0, 0, 0, 0}};
179  opt = getopt_long(argc, argv, "?hvqVi:c:C:d:s:o:p:P:wf",
180  long_options, NULL);
181  if (opt == -1) break;
182  switch (opt)
183  {
184  case '?':
185  case 'h':
186  printf(USAGE);
187  exit(0);
188  break;
189  case 'V':
190  printf("%s\n", rev);
191  exit(0);
192  break;
193  case 'v':
194  ++opt_v;
195  break;
196  case 'q':
197  --opt_v;
198  break;
199  case 'i':
200  opt_init = optarg;
201  break;
202  case 'c':
203  opt_cmd.push_back(optarg);
204  break;
205  case 'C':
206  opt_Cmd.push_back(optarg);
207  break;
208  case 'd':
209  if (opt_disk.size())
210  opt_disk = opt_disk + "," + optarg;
211  else
212  opt_disk = optarg;
213  break;
214  case 's':
215  if (opt_stats.size())
216  opt_stats = opt_stats + "," + optarg;
217  else
218  opt_stats = optarg;
219  break;
220  case 'o':
221  opt_outdir = std::string(optarg) + "/";
222  break;
223  case 'p':
224  opt_period = optarg;
225  break;
226  case 'w':
227  opt_sys_iowait = 1;
228  break;
229  case 'f':
230  opt_fault = 1;
231  break;
232  case 'P':
233  charreplace(optarg, ' ', ',');
234  if (opt_pid.size())
235  opt_pid = opt_pid + "," + optarg;
236  else
237  opt_pid = optarg;
238  break;
239  case 1:
240  opt_ymax = strtoul(optarg, NULL, 0);
241  break;
242  case 2:
243  opt_yincr = strtoul(optarg, NULL, 0);
244  break;
245  case 3:
246  opt_y2max = strtoul(optarg, NULL, 0);
247  break;
248  case 4:
249  opt_y2incr = strtoul(optarg, NULL, 0);
250  break;
251  case 5:
252  opt_pre = strtoul(optarg, NULL, 0);
253  break;
254  case 6:
255  opt_post = strtoul(optarg, NULL, 0);
256  break;
257  case 7:
258  opt_graph += std::string(",") + optarg;
259  break;
260  case 8:
261  opt_ymin = strtoul(optarg, NULL, 0);
262  cp = strstr(optarg, ":") + 1;
263  opt_ymax = strtoul(cp, NULL, 0);
264  if ((cp = strstr(cp, ":")))
265  {
266  ++cp;
267  opt_yincr = strtoul(strstr(cp, ":") + 1, NULL, 0);
268  }
269  else
270  opt_yincr = (opt_ymax - opt_ymin) / 5;
271  break;
272  case 9:
273  opt_comment = optarg;
274  break;
275  case 10:
276  opt_cmd_iowait = 1;
277  break;
278  default:
279  printf("?? getopt returned character code 0%o ??\n", opt);
280  exit(1);
281  }
282  }
283 } /* parse_args */
284 
285 void perror_exit(const char *msg, ...)
286 {
287  char buf[1024];
288  va_list ap;
289  va_start(ap, msg);
290  vsnprintf(buf, sizeof(buf), msg, ap);
291  va_end(ap);
292  TRACE(0, "%s", buf);
293  perror(buf);
294  exit(1);
295 }
296 
297 //void atfork_trace(void) { TRACE( 3, "process %d forking", getpid() ); }
298 /* iofd is in/out
299  if iofd[x]==-1 then create a pipe for that index, x, and return the appropriate pipe fd in iofd[x]
300  else if iofd[x]!=x, dup2(iofd[x],x)
301  else inherit
302  Could add ==-2, then close???
303  */
304 pid_t fork_execv(int close_start, int close_cnt, int sleepB4exec_us, int iofd[3], const char *cmd, char *const argv[], char *const env[])
305 {
306  int pipes[3][2];
307  int lcl_iofd[3];
308  for (auto ii = 0; ii < 3; ++ii)
309  {
310  lcl_iofd[ii] = iofd[ii];
311  if (iofd[ii] == -1)
312  {
313  pipe(pipes[ii]); /* pipes[ii][0] refers to the read end */
314  iofd[ii] = ii == 0 ? pipes[ii][1] : pipes[ii][0];
315  }
316  }
317  pid_t pid = fork();
318  if (pid < 0)
319  perror_exit("fork");
320  else if (pid == 0)
321  { /* child */
322  if (lcl_iofd[0] == -1)
323  { // deal with child stdin
324  close(pipes[0][1]); // child closes write end of pipe which will be it's stdin
325  int fd = dup2(pipes[0][0], 0);
326  TRACE(3, "fork_execv dupped(%d) onto %d (should be 0)", pipes[0][0], fd);
327  close(pipes[0][0]);
328  }
329  if (sleepB4exec_us)
330  {
331  // Do sleep before dealing with stdout/err incase we want TRACE to go to console
332  //int sts=pthread_atfork( atfork_trace, NULL, NULL );
333  usleep(sleepB4exec_us);
334  TRACE(1, "fork_execv sleep complete. sleepB4exec_us=%d sts=%d", sleepB4exec_us, 0 /*sts*/);
335  }
336  for (auto ii = 1; ii < 3; ++ii)
337  { // deal with child stdout/err
338  if (lcl_iofd[ii] == -1)
339  {
340  close(pipes[ii][0]);
341  int fd = dup2(pipes[ii][1], ii);
342  TRACE(3, "fork_execv dupped(%d) onto %d (should be %d)", pipes[ii][1], fd, ii);
343  close(pipes[ii][1]);
344  }
345  else if (lcl_iofd[ii] != ii)
346  {
347  int fd = dup2(lcl_iofd[ii], ii);
348  TRACE(3, "fork_execv dupped(%d) onto %d (should be %d)", pipes[ii][1], fd, ii);
349  }
350  }
351  for (auto ii = close_start; ii < (close_start + close_cnt); ++ii)
352  close(ii);
353  if (env)
354  execve(cmd, argv, env);
355  else
356  execv(cmd, argv);
357  exit(1);
358  }
359  else
360  { // parent
361  for (auto ii = 0; ii < 3; ++ii)
362  if (lcl_iofd[ii] == -1)
363  close(ii == 0 ? pipes[ii][0] : pipes[ii][1]);
364  }
365 
366  TRACE(3, "fork_execv pid=%d", pid);
367  return pid;
368 } // fork_execv
369 
370 uint64_t swapPtr(void *X)
371 {
372  uint64_t x = (uint64_t)X;
373  x = (x & 0x00000000ffffffff) << 32 | (x & 0xffffffff00000000) >> 32;
374  x = (x & 0x0000ffff0000ffff) << 16 | (x & 0xfff0000fffff0000) >> 16;
375  x = (x & 0x00ff00ff00ff00ff) << 8 | (x & 0xff00ff00ff00ff00) >> 8;
376  return x;
377 }
378 
379 /*
380  * Input to AWK can either be a file spec or a string.
381  * If input is string, the fork_execv call is told to create pipe for input.
382  *
383  * The run time duration of the AWK prooces can be determined via TRACE:
384 /home/ron/src
385 mu2edaq01 :^) tshow|egrep 'AWK b4 |AWK after read' |tdelta -d 1 -post /b4/ -stats | tail
386 1013 1489724640538688 2047 1116418481 13521 0 6 3 . AWK b4 fork_execv input=(nil)
387 1018 1489724640536624 1969 1111669678 13521 0 6 3 . AWK b4 fork_execv input=(nil)
388 1023 1489724640534717 1866 1107283893 13521 0 6 3 . AWK b4 fork_execv input=(nil)
389 1032 1489724640531756 2289 1100474359 13521 0 13 3 . AWK b4 fork_execv input=(nil)
390 cpu="0"
391  min 1821
392  max 49210
393  tot 293610
394  ave 2645.1351
395  cnt 111
396 --2017-03-17_08:13:23--
397  */
398 static int g_devnullfd = -1;
399 
400 // Run the awk script specified in awk_cmd on the file
401 std::string AWK(std::string const &awk_cmd, const char *file, const char *input)
402 {
403  char readbuf[1024];
404  ssize_t bytes = 0, tot_bytes = 0;
405  char *const argv_[4] = {(char *)"/bin/gawk",
406  (char *)awk_cmd.c_str(),
407  (char *)file,
408  NULL};
409  pid_t pid;
410  ;
411  int infd = 0;
412  if (g_devnullfd == -1)
413  g_devnullfd = open("/dev/null", O_WRONLY);
414  if (input != NULL)
415  {
416  infd = -1;
417  }
418  //int iofd[3]={infd,-1,g_devnullfd};
419  int iofd[3] = {infd, -1, 2}; // make stdin=infd, create pipr for stdout, inherit stderr
420  TRACE(3, "AWK b4 fork_execv input=%p", (void *)input);
421  char *env[1];
422  env[0] = NULL; // mainly do not want big LD_LIBRARY_PATH
423  pid = fork_execv(0, 0 /*closeCnt*/, 0, iofd, "/bin/gawk", argv_, env);
424  if (input /*||iofd[0]!=0*/)
425  {
426  int xx = strlen(input);
427  int sts = write(iofd[0], input, xx);
428  if (sts != xx)
429  perror("write AWK stdin");
430  close(iofd[0]);
431  while ((bytes = read(iofd[1], &readbuf[tot_bytes], sizeof(readbuf) - tot_bytes)) != 0)
432  {
433  TRACE(3, "AWK while bytes=read > 0 bytes=%zd readbuf=0x%016lx errno=%d", bytes, swapPtr(&readbuf[tot_bytes]), errno);
434  if (bytes == -1)
435  {
436  if (errno == EINTR) continue;
437  break;
438  }
439  tot_bytes += bytes;
440  }
441  TRACE(3, "AWK after read tot=" + std::to_string((long long unsigned)tot_bytes) + " bytes=" + std::to_string((long long unsigned)bytes) + " input=" + std::string(input));
442  }
443  else
444  {
445  while ((bytes = read(iofd[1], &readbuf[tot_bytes], sizeof(readbuf) - tot_bytes)) > 0)
446  tot_bytes += bytes;
447  TRACE(3, "AWK after read tot=%zd bytes=%zd [0]=0x%x input=%p", tot_bytes, bytes, readbuf[0], (void *)input);
448  }
449  readbuf[tot_bytes >= 0 ? tot_bytes : 0] = '\0';
450  close(iofd[1]);
451  TRACE(3, "AWK after close child stdout. child pid=%d", pid);
452 #if 0
453  int status;
454  pid_t done_pid = waitpid(pid,&status,0);
455  TRACE( 3, "AWK after wait pid=%d done_pid=%d status=%d(0x%x)"
456  , pid, done_pid, status, status );
457 #endif
458  return std::string(readbuf);
459 } // AWK
460 
461 // separate string and _add_to_ vector
462 void string_addto_vector(std::string &instr, std::vector<std::string> &outvec, char delim)
463 {
464  std::stringstream ss(instr);
465  while (ss.good())
466  {
467  std::string substr;
468  std::getline(ss, substr, delim);
469  outvec.push_back(substr);
470  }
471 }
472 
473 uint64_t gettimeofday_us(void) //struct timespec *ts )
474 {
475  struct timeval tv;
476  gettimeofday(&tv, NULL);
477  // if (ts) {
478  // ts->tv_sec = tv.tv_sec;
479  // ts->tv_nsec = tv.tv_usec * 1000;
480  // }
481  return (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec;
482 } /* gettimeofday_us */
483 
484 #define DATA_START " DATA START"
485 #define GNUPLOT_PREFIX (const char *) \
486  "\
487 #!/usr/bin/env gnuplot\n\
488 # ./$0\n\
489 # OR\n\
490 # gnuplot -e 'ymin=400;ymax=1400' ./$0\n\
491 # OR try\n\
492 # gnuplot -e 'duration_s=35;set multiplot' ./gnuplot.gnuplot ./gnuplot.1.gnuplot -e 'set nomultiplot;pause -1'\n\
493 if(!exists('ARG0')) ARG0='' # for version 4, use: gnuplot -e ARG0=hello\n\
494 print 'ARG0=',ARG0 # ARG0.. automatically define in gnuplot version 5+\n\
495 if(!exists('ymin')) ymin=%d\n\
496 if(!exists('ymax')) ymax=%d\n\
497 if(!exists('yincr')) yincr=%d\n\
498 if(!exists('y2max')) y2max=%d\n\
499 if(!exists('y2incr')) y2incr=%d\n\
500 if(!exists('png')) png=1\n\
501 if(!exists('duration_s')) duration_s=0\n\
502 if(!exists('width')) width=512\n\
503 if(!exists('height')) height=384\n\
504 thisPid=system('echo `ps -p$$ -oppid=`')\n\
505 thisFile=system('ls -l /proc/'.thisPid.\"/fd | grep -v pipe: | tail -1 | sed -e 's/.*-> //'\")\n\
506 \n\
507 set title \"Disk Write Rate and %%CPU vs. time\\n%s %s %s%s\" # cmd and/or comment at end\n\
508 set xdata time\n\
509 tfmt='%%Y-%%m-%%dT%%H:%%M:%%S' # try to use consistent format\n\
510 set timefmt '%%Y-%%m-%%dT%%H:%%M:%%S'\n\
511 set xlabel 'time'\n\
512 set grid xtics back\n\
513 xstart=system(\"awk '/^....-..-..T/{print$1;exit}' \".thisFile)\n\
514 xend=system(\"awk 'END{print$1}' \".thisFile)\n\
515 print 'xstart='.xstart.' xend='.xend.' duration=',strptime(tfmt,xend)-strptime(tfmt,xstart)\n\
516 if(duration_s>0) end_t=strptime(tfmt,xstart)+duration_s; else end_t=strptime(tfmt,xend)\n\
517 set xrange [xstart:end_t]\n\
518 \n\
519 set ylabel '%s'\n\
520 set ytics nomirror\n\
521 if(ymax==0) set yrange [ymin:*];\\\n\
522 else set yrange [ymin:ymax];set ytics yincr\n\
523 set grid ytics back\n\
524 \n\
525 set y2label '%%CPU, %%MemTotal'\n\
526 set y2tics autofreq\n\
527 if(y2max==0) set y2range [0:*];\\\n\
528 else set y2range [0:y2max];set y2tics y2incr\n\
529 set pointsize .6\n\
530 \n\
531 if(png==1) set terminal png size width,height;\\\n\
532  pngfile=system( 'echo `basename '.thisFile.' .out`.png' );\\\n\
533  set output pngfile;\\\n\
534 else set terminal x11 size width,height\n\
535 \n\
536 plot \"< awk '/^#" DATA_START "/,/NEVER HAPPENS/' \".thisFile "
537 
538 void sigchld_sigaction(int signo, siginfo_t *info, void *context __attribute__((__unused__)))
539 {
540  /* see man sigaction for description of siginfo_t */
541  for (size_t ii = 0; ii < g_pid_vec.size(); ++ii)
542  {
543  pid_t pid = g_pid_vec[ii];
544  if (pid == info->si_pid)
545  {
546  TRACE(2, "sigchld_sigaction signo=%d status=%d(0x%x) code=%d(0x%x) sending_pid=%d", signo, info->si_status, info->si_status, info->si_code, info->si_code, info->si_pid);
547  return;
548  }
549  }
550  TRACE(3, "sigchld_sigaction signo=%d status=%d(0x%x) code=%d(0x%x) sending_pid=%d", signo, info->si_status, info->si_status, info->si_code, info->si_code, info->si_pid);
551 }
552 
553 void read_proc_file(const char *file, char *buffer, int buffer_size)
554 {
555  TRACE(4, "read_proc_file b4 open proc file" + std::string(file));
556  int fd = open(file, O_RDONLY);
557  int offset = 0, sts = 0;
558  while (1)
559  {
560  sts = read(fd, &buffer[offset], buffer_size - offset);
561  if (sts <= 0)
562  {
563  sts = 0;
564  break;
565  }
566  offset += sts;
567  }
568  buffer[sts + offset] = '\0';
569  close(fd);
570  TRACE(4, "read_proc_file after close " + std::string(file) + " read=%d offset=%d", sts, offset);
571 }
572 
573 pid_t check_pid_vec(void)
574 {
575  for (size_t ii = 0; ii < g_pid_vec.size();)
576  {
577  pid_t pid = g_pid_vec[ii];
578  int status;
579  pid_t pp = waitpid(pid, &status, WNOHANG);
580  TRACE(3, "check_pid_vec %d=waitpid(pid=%d) errno=%d", pp, pid, errno);
581  if (pp > 0)
582  g_pid_vec.erase(g_pid_vec.begin() + ii);
583  else if (pp == -1)
584  {
585  if (errno == ECHILD && kill(pid, 0) == 0)
586  // there is a process, but not my child process
587  ++ii;
588  else
589  // some other error
590  g_pid_vec.erase(g_pid_vec.begin() + ii);
591  }
592  else
593  ++ii;
594  }
595  if (g_pid_vec.size() == 0)
596  return -1;
597  else
598  return 0;
599 }
600 
601 void cleanup(void)
602 {
603  TRACE(1, "atexit cleanup g_pid_vec.size()=%zd\n", g_pid_vec.size());
604  for (std::vector<pid_t>::iterator pid = g_pid_vec.begin(); pid != g_pid_vec.end(); ++pid)
605  {
606  kill(*pid, SIGHUP);
607  }
608 }
609 #if (defined(__cplusplus) && (__cplusplus >= 201103L)) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))
610 #pragma GCC diagnostic push
611 #pragma GCC diagnostic ignored "-Wunused-parameter" /* b/c of TRACE_XTRA_UNUSED */
612 #endif
613 void sigint_sigaction(int signo, siginfo_t *info, void *context)
614 {
615  cleanup();
616  exit(1);
617 }
618 #if (defined(__cplusplus) && (__cplusplus >= 201103L)) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))
619 #pragma GCC diagnostic pop
620 #endif
621 
622 int main(int argc, char *argv[])
623 {
624  struct timeval tv;
625  int post_periods_completed = 0;
626  parse_args(argc, argv);
627  if ((argc - optind) != 0 || (opt_cmd.size() == 0 && opt_pid.size() == 0))
628  { //(argc-optind) is the number of non-opt args supplied.
629  int ii;
630  printf("unexpected argument(s) %d!=0\n", argc - optind);
631  for (ii = 0; (optind + ii) < argc; ++ii)
632  printf("arg%d=%s\n", ii + 1, argv[optind + ii]);
633  printf(USAGE);
634  exit(0);
635  }
636 
637  std::vector<std::string> graphs;
638  string_addto_vector(opt_graph, graphs, ',');
639 
640  char motherboard[1024] = {0};
641  if (getuid() == 0)
642  {
643  FILE *fp = popen("dmidecode | grep -m2 'Product Name:' | tail -1", "r");
644  fread(motherboard, 1, sizeof(motherboard), fp);
645  pclose(fp);
646  }
647  TRACE(1, "main - motherboard=" + std::string(motherboard));
648 
649  /* Note, when doing "waitpid" the wait would sometimes take a "long"
650  time (10's to 100's milliseconds; rcu???) If signal is generated
651  (i.e SA_NOCLDWAIT w/ sigchld_sigaction (not SIG_IGN)), it would
652  sometimes effect the read or write calls for the following AWK forks.
653  So, use SIG_IGN+SA_NOCLDWAIT.
654  */
655  struct sigaction sigaction_s;
656 #ifndef DO_SIGCHLD
657 #define DO_SIGCHLD 1
658 #endif
659 #if DO_SIGCHLD
660  sigaction_s.sa_sigaction = sigchld_sigaction;
661  sigaction_s.sa_flags = SA_SIGINFO | SA_NOCLDWAIT;
662 #else
663  sigaction_s.sa_handler = SIG_IGN;
664  sigaction_s.sa_flags = SA_NOCLDWAIT;
665 #endif
666  sigemptyset(&sigaction_s.sa_mask);
667  sigaction(SIGCHLD, &sigaction_s, NULL);
668 
669  sigaction_s.sa_sigaction = sigint_sigaction;
670  sigaction_s.sa_flags = SA_SIGINFO;
671  sigaction(SIGINT, &sigaction_s, NULL);
672 
673  //may return 0 when not able to detect
674  //long long unsigned concurentThreadsSupported = std::thread::hardware_concurrency();
675  long long unsigned concurentThreadsSupported = sysconf(_SC_NPROCESSORS_ONLN);
676  //TRACE_CNTL( "reset" ); TRACE_CNTL( "modeM", 1L );
677  TRACE(0, "main concurentThreadsSupported=%u opt_stats=" + opt_stats, concurentThreadsSupported);
678 
679  char run_time[80];
680  gettimeofday(&tv, NULL);
681  strftime(run_time, sizeof(run_time), "%FT%H%M%S", localtime(&tv.tv_sec));
682  TRACE(0, "main run_time=" + std::string(run_time));
683 
684  // get hostname
685  struct utsname ubuf;
686  uname(&ubuf);
687  char *dot;
688  if ((dot = strchr(ubuf.nodename, '.')) != NULL)
689  *dot = '\0';
690  std::string hostname(ubuf.nodename);
691  TRACE(1, "release=" + std::string(ubuf.release) + " version=" + std::string(ubuf.version));
692 
693  // get system mem (KB)
694  std::string memKB = AWK("NR==1{print$2;exit}", "/proc/meminfo", NULL);
695  memKB = memKB.substr(0, memKB.size() - 1); // remove trailing newline
696 
697  std::string dat_file_out(opt_outdir + "periodic_" + run_time + "_" + hostname + "_stats.out");
698 
699  double period = atof(opt_period);
700 
701  atexit(cleanup);
702  pid_t pp;
703  std::vector<std::string> pidfile;
704 
705  std::vector<std::string> stats;
706 
707  // For each cmd: create out file, fork process (with delay param),
708  // add to stats vec to get CPU info, add to graphs vec to plot cmd CPU
709  for (size_t ii = 0; ii < opt_cmd.size(); ++ii)
710  {
711  char cmd_file_out[1024];
712  snprintf(cmd_file_out, sizeof(cmd_file_out), "%speriodic_%s_%s_cmd%zd.out", opt_outdir.c_str(), run_time, hostname.c_str(), ii);
713  int fd = open(cmd_file_out, O_WRONLY | O_CREAT, 0666);
714  TRACE(0, "main fd=%d opt_cmd=" + opt_cmd[ii] + " cmd_file_out=" + std::string(cmd_file_out), fd);
715  int iofd[3] = {0, fd, fd}; // redirect stdout/err to the cmd-out-file
716  char *const argv_[4] = {(char *)"/bin/sh",
717  (char *)"-c",
718  (char *)opt_cmd[ii].c_str(),
719  NULL};
720  g_pid_vec.push_back(fork_execv(0, 0, (int)(period * opt_pre * 1e6), iofd, "/bin/sh", argv_, NULL));
721  close(fd); // the output file has been given to the subprocess
722  std::string pidstr = std::to_string((long long int)g_pid_vec[ii]);
723  pidfile.push_back("/proc/" + pidstr + "/stat");
724  //pidfile.push_back( "/proc/"+pidstr+"/task/"+pidstr+"/stat" );
725  char desc[128], ss[1024];
726  // field 14-17: Documentation/filesystems/proc.txt Table 1-4: utime stime cutime cstime
727  snprintf(ss, sizeof(ss), "CPUcmd%zd?%s?NR==1?$14+$15?1?yes", ii, pidfile[ii].c_str());
728  stats.push_back(ss);
729 
730  snprintf(desc, sizeof(desc), "CPU+cmd%zd", ii);
731  graphs.push_back(desc); // cmd0 is in the GNUPLOT_PREFIX
732  snprintf(ss, sizeof(ss), "%s?%s?NR==1?$14+$15+16+$17?1?yes", desc, pidfile[ii].c_str());
733  stats.push_back(ss);
734 
735  snprintf(desc, sizeof(desc), "WaitBlkIOcmd%zd", ii);
736  if (opt_cmd_iowait) graphs.push_back(desc);
737  snprintf(ss, sizeof(ss), "%s?%s?NR==1?$42?1?yes", desc, pidfile[ii].c_str());
738  stats.push_back(ss);
739 
740  snprintf(desc, sizeof(desc), "Faultcmd%zd", ii);
741  if (opt_fault) graphs.push_back(desc);
742  snprintf(ss, sizeof(ss), "%s?%s?NR==1?$10+$11+$12+$13?4096.0/1048576?yes", desc, pidfile[ii].c_str());
743  stats.push_back(ss);
744  }
745  for (size_t ii = 0; ii < opt_Cmd.size(); ++ii)
746  {
747  char cmd_file_out[1024];
748  snprintf(cmd_file_out, sizeof(cmd_file_out), "%speriodic_%s_%s_cmd%zd.out", opt_outdir.c_str(), run_time, hostname.c_str(), ii + opt_cmd.size());
749  int fd = open(cmd_file_out, O_WRONLY | O_CREAT, 0666);
750  TRACE(0, "main fd=%d opt_Cmd=" + opt_Cmd[ii] + " cmd_file_out=" + std::string(cmd_file_out), fd);
751  int iofd[3] = {0, fd, fd}; // redirect stdout/err to the cmd-out-file
752  char *const argv_[4] = {(char *)"/bin/sh",
753  (char *)"-c",
754  (char *)opt_Cmd[ii].c_str(),
755  NULL};
756  g_pid_vec.push_back(fork_execv(0, 0, (int)(period * opt_pre * 1e6), iofd, "/bin/sh", argv_, NULL));
757  close(fd); // the output file has been given to the subprocess
758  std::string pidstr = std::to_string((long long int)g_pid_vec[ii]);
759  pidfile.push_back("/proc/" + pidstr + "/stat");
760  //pidfile.push_back( "/proc/"+pidstr+"/task/"+pidstr+"/stat" );
761  char desc[128], ss[1024];
762  snprintf(desc, sizeof(desc), "CPU+cmd%zd", ii + opt_cmd.size());
763  snprintf(ss, sizeof(ss), "CPUcmd%zd?%s?NR==1?$14+$15?1?yes", ii + opt_cmd.size(), pidfile[ii].c_str());
764  stats.push_back(ss);
765  snprintf(ss, sizeof(ss), "CPU+cmd%zd?%s?NR==1?$14+$15+16+$17?1?yes", ii + opt_cmd.size(), pidfile[ii].c_str());
766  stats.push_back(ss);
767  // JUST DONT ADD THESE TO graphs
768  }
769  std::vector<std::string> pids;
770  if (opt_pid.size())
771  string_addto_vector(opt_pid, pids, ',');
772  for (size_t ii = 0; ii < pids.size(); ++ii)
773  {
774  g_pid_vec.push_back(std::stoi(pids[ii]));
775  TRACE(1, "pid=%s g_pid_vec.size()=%ld", pids[ii].c_str(), g_pid_vec.size());
776  pidfile.push_back("/proc/" + pids[ii] + "/stat");
777  char desc[128], ss[1024];
778  // field 14-17: Documentation/filesystems/proc.txt Table 1-4: utime stime cutime cstime
779  snprintf(ss, sizeof(ss), "CPUpid%zd?%s?NR==1?$14+$15?1?yes", ii, pidfile[ii].c_str());
780  stats.push_back(ss);
781 
782  std::ifstream t("/proc/" + pids[ii] + "/comm");
783  std::string comm((std::istreambuf_iterator<char>(t)),
784  std::istreambuf_iterator<char>());
785  comm = comm.substr(0, comm.size() - 1); // strip nl
786 
787  snprintf(desc, sizeof(desc), "CPU+pid%zd_%s", ii, comm.c_str());
788  graphs.push_back(desc); // cmd0 is in the GNUPLOT_PREFIX
789  snprintf(ss, sizeof(ss), "%s?%s?NR==1?$14+$15+16+$17?1?yes", desc, pidfile[ii].c_str());
790  stats.push_back(ss);
791 
792  snprintf(desc, sizeof(desc), "WaitBlkIOpid%zd", ii);
793  if (opt_cmd_iowait) graphs.push_back(desc);
794  snprintf(ss, sizeof(ss), "%s?%s?NR==1?$42?1?yes", desc, pidfile[ii].c_str());
795  stats.push_back(ss);
796 
797  snprintf(desc, sizeof(desc), "Faultpid%zd", ii);
798  if (opt_fault) graphs.push_back(desc);
799  snprintf(ss, sizeof(ss), "%s?%s?NR==1?$10+$11+$12+$13?4096.0/1048576?yes", desc, pidfile[ii].c_str());
800  stats.push_back(ss);
801  }
802 
803  stats.push_back("CPUnode");
804  stats.push_back("IOWait");
805  if (opt_sys_iowait) { graphs.push_back("IOWait"); }
806  stats.push_back("Cached");
807  stats.push_back("Dirty");
808  stats.push_back("Free");
809 
810  if (opt_disk.size())
811  {
812  std::vector<std::string> tmp;
813  string_addto_vector(opt_disk, tmp, ',');
814  for (std::vector<std::string>::iterator dk = tmp.begin(); dk != tmp.end(); ++dk)
815  {
816  // /proc/diskstat has 11 field after an initial 3 (14 total) for each device
817  // The 7th field after the device name (the 10th field total) is # of sectors written.
818  // Sectors appear to be 512 bytes. So, deviding by 2048 converts to MBs.
819  std::string statstr = *dk + "_wrMB/s?/proc/diskstats?/" + *dk + "/?$10?(1.0/2048)?yes";
820  stats.push_back(statstr);
821  std::vector<std::string> stat_spec;
822  string_addto_vector(statstr, stat_spec, '?');
823  graphs.push_back(stat_spec[s_desc]);
824 
825  statstr = *dk + "_rdMB/s?/proc/diskstats?/" + *dk + "/?$6?(1.0/2048)?yes";
826  stats.push_back(statstr);
827  stat_spec.clear();
828  string_addto_vector(statstr, stat_spec, '?');
829  //graphs.push_back( stat_spec[s_desc] ); // don't add read by default -- can be added with --graph
830  }
831  }
832 
833  if (opt_stats.size())
834  {
835  std::vector<std::string> tmp_stats;
836  string_addto_vector(opt_stats, tmp_stats, ',');
837  for (std::vector<std::string>::iterator st = tmp_stats.begin();
838  st != tmp_stats.end(); ++st)
839  {
840  stats.push_back(*st);
841  std::vector<std::string> stat_spec;
842  string_addto_vector(*st, stat_spec, '?');
843  graphs.push_back(stat_spec[s_desc]);
844  }
845  }
846 
847  std::vector<long> pre_vals;
848  std::vector<double> multipliers;
849  std::vector<std::vector<std::string>> spec2(stats.size());
850  std::vector<std::string> awkCmd;
851 
852  std::string header_str("#" DATA_START "\n#_______time_______");
853 
854  int outfd = open(dat_file_out.c_str(), O_WRONLY | O_CREAT, 0777);
855  //FILE *outfp=stdout;
856  FILE *outfp = fdopen(outfd, "w");
857 
858  std::string cmd_comment("");
859  if (opt_cmd.size())
860  cmd_comment += "\\ncmd: " + opt_cmd[0];
861  if (opt_comment.size())
862  cmd_comment += "\\ncomment: " + opt_comment;
863  fprintf(outfp, GNUPLOT_PREFIX, opt_ymin, opt_ymax, opt_yincr, opt_y2max, opt_y2incr, run_time, hostname.c_str(), ubuf.release, cmd_comment.c_str(), "disk write MB/s");
864 
865  uint64_t t_start = gettimeofday_us();
866 
867  // build header string and get initial values for "rate" stats
868  bool first_graph_spec_added = false;
869  for (size_t ii = 0; ii < stats.size(); ++ii)
870  {
871  std::vector<std::string> stat_spec;
872  string_addto_vector(stats[ii], stat_spec, '?');
873  if (stat_spec[s_desc] == "CPUnode" && stat_spec.size() == 1)
874  // Ref. Documentation/filesystems/proc.txt: user+nice+system (skip idle) +iowait+irq+softirq+steal (skip guest)
875  stats[ii] += "?/proc/stat?/^cpu[^0-9]/?$2+$3+$4+$6+$7+$8+$9?1.0/" + std::to_string(concurentThreadsSupported) + "?yes";
876  else if (stat_spec[s_desc] == "IOWait" && stat_spec.size() == 1)
877  stats[ii] += "?/proc/stat?/^cpu[^0-9]/?$6?1.0/" + std::to_string(concurentThreadsSupported) + "?yes";
878  else if (stat_spec[s_desc] == "Cached" && stat_spec.size() == 1)
879  stats[ii] += "?/proc/meminfo?/^(Cached|Buffers):/?$2?1?no";
880  else if (stat_spec[s_desc] == "Dirty" && stat_spec.size() == 1)
881  stats[ii] += "?/proc/meminfo?/^Dirty:/?$2?1?no";
882  else if (stat_spec[s_desc] == "Free" && stat_spec.size() == 1)
883  stats[ii] += "?/proc/meminfo?/^MemFree:/?$2?1?no";
884 
885  header_str += " " + stat_spec[s_desc];
886 
887  string_addto_vector(stats[ii], spec2[ii], '?');
888  char awk_cmd[1024];
889  snprintf(awk_cmd, sizeof(awk_cmd), "%s{vv+=%s}END{print vv}"
890  //snprintf( awk_cmd, sizeof(awk_cmd), "%s{vv+=%s;print \"vv now\",vv > \"/dev/stderr\";}END{print vv}"
891  ,
892  spec2[ii][s_linespec].c_str(), spec2[ii][s_fieldspec].c_str());
893  awkCmd.push_back(awk_cmd);
894 
895  std::string stat = AWK(awkCmd.back(), spec2[ii][s_file].c_str(), NULL);
896 
897  pre_vals.push_back(atol(stat.c_str()));
898  multipliers.push_back(atof(AWK("BEGIN{print " + spec2[ii][s_multiplier] + "}", "/dev/null", NULL).c_str()));
899  //fprintf( stderr, " l=%s", spec2[ii][s_linespec].c_str() );
900  for (size_t jj = 0; jj < graphs.size(); ++jj)
901  if (graphs[jj] == stat_spec[s_desc])
902  {
903  if (first_graph_spec_added) fprintf(outfp, ",\\\n '' ");
904  if (strncmp(stat_spec[s_desc].c_str(), "CPU", 3) == 0)
905  fprintf(outfp, "using 1:%zd title '%s' w linespoints axes x1y2", ii + 2, stat_spec[s_desc].c_str());
906  else if (stat_spec[s_desc] == "Cached" || stat_spec[s_desc] == "Dirty" || stat_spec[s_desc] == "Free")
907  fprintf(outfp, "using 1:($%zd/%s*100) title '%s%%' w linespoints axes x1y2", ii + 2, memKB.c_str(), stat_spec[s_desc].c_str());
908  else if (stat_spec[s_desc].substr(0, 6) == "CPUcmd" || stat_spec[s_desc].substr(0, 6) == "CPU+cm")
909  fprintf(outfp, "using 1:%zd title '%s' w linespoints axes x1y2", ii + 2, stat_spec[s_desc].c_str());
910  else if (stat_spec[s_desc].substr(0, 12) == "WaitBlkIOcmd")
911  fprintf(outfp, "using 1:%zd title '%s' w linespoints axes x1y2", ii + 2, stat_spec[s_desc].c_str());
912  else
913  fprintf(outfp, "using 1:%zd title '%s' w linespoints axes x1y1", ii + 2, stat_spec[s_desc].c_str());
914  first_graph_spec_added = true;
915  }
916  }
917  header_str += " #\n";
918 
919  fprintf(outfp,
920  "\nif(png==0) pause -1 'Press Enter/Return or ^C to finish'\n\
921 exit\n");
922 
923  // print the cmds
924  fprintf(outfp, "cmds:\n");
925  for (size_t ii = 0; ii < opt_cmd.size(); ++ii)
926  {
927  std::string ss = opt_cmd[ii] + "\n";
928  fprintf(outfp, "%s", ss.c_str());
929  }
930 
931  // print the specs
932  fprintf(outfp, "stats:\n");
933  for (size_t ii = 0; ii < stats.size(); ++ii)
934  {
935  std::string ss = stats[ii] + "\n";
936  fprintf(outfp, "%s", ss.c_str());
937  }
938 
939  // now print header
940  fprintf(outfp, "%s", header_str.c_str());
941  fflush(outfp);
942 
943  std::string tmpdbg("main lp=%d done stat%zd=%ld rate=%f ");
944  //char tmpdbgbuf[128];
945  char proc_stats[8192];
946  char *awk_in;
947  int lp;
948 
949  // - - - - - - - - - - - - - - - - - - - - - - - -
950  // wait a period and then start collecting the stats
951 eintr1:
952  int64_t t_sleep = (t_start + (uint64_t)(period * 1e6)) - gettimeofday_us();
953  if (t_sleep > 0)
954  {
955  int sts = usleep(t_sleep);
956  TRACE(3, "main usleep sts=%d errno=%d", sts, errno);
957  if (errno == EINTR)
958  goto eintr1;
959  }
960 
961 #define MAX_LP 600
962  for (lp = 2; lp < MAX_LP; ++lp)
963  {
964  char str[80];
965  gettimeofday(&tv, NULL);
966  strftime(str, sizeof(str), "%FT%T", localtime(&tv.tv_sec));
967  //fprintf(outfp, "%s.%ld", str, tv.tv_usec/100000 );
968  fprintf(outfp, "%s", str);
969  std::string prv_file("");
970  for (size_t ii = 0; ii < stats.size(); ++ii)
971  {
972  TRACE(3, "main lp=%d start stat%zd", lp, ii);
973  char const *awk_file;
974  if (ii < (2 * opt_cmd.size()))
975  { // For each cmd, the
976  // /proc/<pid>/stat file
977  // will be referenced twice.
978  if ((ii & 1) == 0)
979  {
980  read_proc_file(pidfile[ii / 2].c_str(), proc_stats, sizeof(proc_stats));
981  }
982  awk_in = proc_stats;
983  awk_file = NULL;
984  }
985  else if (spec2[ii][s_file] != prv_file)
986  {
987  prv_file = spec2[ii][s_file];
988  read_proc_file(spec2[ii][s_file].c_str(), proc_stats, sizeof(proc_stats));
989  awk_in = proc_stats;
990  awk_file = NULL;
991  }
992 
993  std::string stat_str = AWK(awkCmd[ii], awk_file, awk_in);
994 
995  long stat = atol(stat_str.c_str());
996 
997  if (spec2[ii][s_rate] == "yes")
998  {
999  double rate;
1000  if (stat_str != "\n")
1001  rate = (stat - pre_vals[ii]) * multipliers[ii] / period;
1002  else
1003  rate = 0.0;
1004  TRACE(3, tmpdbg + "stat_str[0]=0x%x stat_str.size()=%zd", lp, ii, stat, rate, stat_str[0], stat_str.size());
1005  fprintf(outfp, " %.2f", rate);
1006  if (rate < 0.0 && spec2[ii][s_file] == "/proc/diskstats")
1007  {
1008  TRACE(0, "main stat:" + spec2[ii][s_desc] + " rate=%f pre_val=%ld stat=%ld stat_str=\"" + stat_str + "\" awkCmd=" + awkCmd[ii] + " proc_diskstats=" + proc_stats, rate, pre_vals[ii], stat);
1009  //TRACE_CNTL( "modeM", 0L );
1010  }
1011  pre_vals[ii] = stat;
1012  }
1013  else
1014  {
1015  TRACE(3, "main lp=%d done stat%zd=%ld", lp, ii, stat);
1016  fprintf(outfp, " %.2f", stat * multipliers[ii]);
1017  }
1018  }
1019  fprintf(outfp, "\n");
1020  fflush(outfp);
1021  eintr2:
1022  int64_t t_sleep = (t_start + (uint64_t)(period * lp * 1000000)) - gettimeofday_us();
1023  if (t_sleep > 0)
1024  {
1025  int sts = usleep(t_sleep);
1026  TRACE(3, "main usleep sts=%d errno=%d", sts, errno);
1027  if (errno == EINTR)
1028  goto eintr2;
1029  }
1030  pp = check_pid_vec();
1031  TRACE(2, "main pp=%d t_sleep=%ld", pp, t_sleep);
1032  if (pp == -1)
1033  {
1034  if (post_periods_completed == 0)
1035  TRACE(1, "main processes complete - waiting %d post periods", opt_post);
1036  if (post_periods_completed++ == opt_post)
1037  break;
1038  }
1039  }
1040  if (lp == MAX_LP)
1041  {
1042  fprintf(outfp, "# MAX_LP abort\n");
1043  }
1044 
1045  //TRACE( 0, "main waiting for pid=%d", pid );
1046  //wait(&status);
1047  //TRACE( 0, "main status=%d",status );
1048  TRACE(0, "main done/complete/returning");
1049  //TRACE_CNTL( "modeM", 0L );
1050  return (0);
1051 } // main