artdaq  v3_04_01
periodic_cmd_stats.cc
1  // This file (periodic_cmd_stats.cc) was created by Ron Rechenmacher <ron@fnal.gov> on
2  // Jan 5, 2017. "TERMS AND CONDITIONS" governing this file are in the README
3  // or COPYING file. If you do not have such a file, one can be obtained by
4  // contacting Ron or Fermi Lab in Batavia IL, 60510, phone: 630-840-3000.
5  // $RCSfile: periodic_cmd_stats.cc,v $
6 static const char*rev="$Revision: 1.19 $$Date: 2018/06/28 21:14:28 $";
7 
8 // make periodic_cmd_stats
9 // OR make periodic_cmd_stats CXX="g++ -std=c++0x -DDO_TRACE"
10 
11 #define USAGE "\
12  usage: %s --cmd=<cmd>\n\
13 examples: %s --cmd='sleep 25' # this will take about a minute\n\
14  %s --cmd='taskset -c 4 dd if=/dev/zero of=/dev/sdf bs=50M count=500 oflag=direct' --disk=sdf\n\
15  %s --cmd='dd if=/dev/zero of=t.dat count=500' --stat='md*MB/s?/proc/diskstats?/md[0-3]/?$10?(1.0/2048)?yes'\n\
16  gnuplot -e png=0 `/bin/ls -t periodic_*_stats.out|head -1`\n\
17 For each cmd, record CPU info. Additionally, record total system CPU\n\
18 (can be >100 w/ multicore), file system Cached and Dirty %%, plus any\n\
19 other stats specified by --stats options\n\
20 options:\n\
21 --cmd= can have multiple\n\
22 --Cmd= run cmd, but dont graph CPU (can have multiple)\n\
23 --pid= graph comma or space sep. list of pids (getting cmd from /proc/)\n\
24 --disk= automatically build --stat for disk(s)\n\
25 --stat= desc?file?linespec?fieldspec?multiplier?rate\n\
26  builtin = CPUnode,Cached,Dirty\n\
27  cmd-builtin = CPUcmdN, CPU+cmdN\n\
28 --out-dir= output dir (for 2+ output files; stats and cmd+)\n\
29 \n\
30 --period= (float) default=%s\n\
31 --pre= the number of periods wait before exec of --cmd\n\
32 --post= the number of periods to loop after cmd exits\n\
33 --graph= add to graph any possibly included non-graphed metrics\n\
34 \n\
35 --no-defaults for no default stats (ie. cpu-parent,cpu-children)\n\
36 --init= default dropcache if root. NOT implemented yet\n\
37 --duration=\n\
38 --comment=\n\
39 --yrange=400:1400\n\
40 --y2max=\n\
41 --y2incr=\n\
42 --pause=0\n\
43 --sys-iowait,-w include the system iowait on the graph\n\
44 --cmd-iowait include cmd iowait on the graph\n\
45 --fault,-f\n\
46 ", basename(argv[0]),basename(argv[0]),basename(argv[0]),basename(argv[0]),opt_period
47 
48 enum { s_desc, s_file, s_linespec, s_fieldspec, s_multiplier, s_rate };
49 
50 #include <stdio.h> // printf
51 #include <getopt.h> // getopt_long, {no,required,optional}_argument, extern char *optarg; extern int opt{ind,err,opt}
52 #include <unistd.h> // getpid, sysconf
53 #include <sys/wait.h> // wait
54 #include <fcntl.h> // O_WRONLY|O_CREAT, open
55 #include <sys/time.h> /* gettimeofday, timeval */
56 #include <signal.h> /* sigaction, siginfo_t, sigset_t */
57 #include <sys/utsname.h> // uname
58 #include <string>
59 #include <sstream> // std::stringstream
60 #include <vector>
61 #include <thread>
62 #include <fstream> // std::ifstream
63 
64 #ifdef DO_TRACE
65 # define TRACE_NAME "periodic_cmd_stats"
66 # include "trace.h"
67 #else
68 # include <stdarg.h> /* va_list */
69 # include <string.h> /* memcpy */
70 # include <string>
71 static void trace_ap(const char*msg,va_list ap)
72 { char m_[1024]; unsigned len=strlen(msg);
73  len=len<(sizeof(m_)-2)?len:(sizeof(m_)-2); memcpy(m_,msg,len+1);
74  if(m_[len-1]!='\n') memcpy(&(m_[len]),"\n",2);
75  vprintf(m_,ap);va_end(ap);
76 }
77 static void trace_p(const char*msg,...)__attribute__((format(printf,1,2)));
78 static void trace_p(const char*msg,...)
79 { va_list ap;va_start(ap,msg); trace_ap(msg ,ap); va_end(ap);
80 }
81 static void trace_p(const std::string msg,...)
82 { va_list ap;va_start(ap,msg); trace_ap(msg.c_str(),ap); va_end(ap);
83 }
84 # define TRACE(lvl,...) do if(lvl<=1)trace_p(__VA_ARGS__);while(0)
85 # define TRACE_CNTL( xyzz,... )
86 #endif
87 
88 /* GLOBALS */
89 int opt_v=1;
90 char* opt_init=NULL;
91 std::vector<std::string> opt_cmd;
92 std::vector<std::string> opt_Cmd;
93 std::string opt_pid;
94 std::string opt_disk;
95 std::string opt_stats;
96 std::string opt_outdir("");
97 std::string opt_graph("CPUnode,Cached,Dirty,Free"); // CPU+ will always be graphed
98 const char* opt_period="5.0";
99 std::string opt_comment;
100 int opt_pre=6; // number of periods to sleepB4exec
101 int opt_post=6;
102 int opt_ymin=0;
103 int opt_ymax=2000;
104 int opt_yincr=200;
105 int opt_y2max=200;
106 int opt_y2incr=20;
107 int opt_sys_iowait=0;
108 int opt_cmd_iowait=0;
109 int opt_fault=0;
110 
111 std::vector<pid_t> g_pid_vec;
112 
113 void charreplace( char* instr, char oldc, char newc )
114 {
115  while (*instr) {
116  if (*instr == oldc)
117  *instr=newc;
118  ++instr;
119  }
120 }
121 
122 
123 void parse_args( int argc, char *argv[] )
124 {
125  char *cp;
126  // parse opt, optargs, and args
127  while (1) {
128  int opt;
129  static struct option long_options[] = {
130  // name has_arg *flag val
131  { "help", no_argument, 0, 'h' },
132  { "init", required_argument,0, 'i' },
133  { "cmd", required_argument,0, 'c' },
134  { "Cmd", required_argument,0, 'C' },
135  { "disk", required_argument,0, 'd' },
136  { "stat", required_argument,0, 's' },
137  { "out-dir", required_argument,0, 'o' },
138  { "period", required_argument,0, 'p' },
139  { "sys-iowait", no_argument, 0, 'w' },
140  { "fault", no_argument, 0, 'f' },
141  { "pid", required_argument,0, 'P' },
142  { "ymax", required_argument,0, 1 },
143  { "yincr", required_argument,0, 2 },
144  { "y2max", required_argument,0, 3 },
145  { "y2incr", required_argument,0, 4 },
146  { "pre", required_argument,0, 5 },
147  { "post", required_argument,0, 6 },
148  { "graph", required_argument,0, 7 },
149  { "yrange", required_argument,0, 8 },
150  { "comment", required_argument,0, 9 },
151  { "cmd-iowait", no_argument, 0, 10 },
152  { 0, 0, 0, 0 }
153  };
154  opt = getopt_long( argc, argv, "?hvqVi:c:C:d:s:o:p:P:wf",
155  long_options, NULL );
156  if (opt == -1) break;
157  switch (opt) {
158  case '?': case 'h': printf(USAGE); exit(0); break;
159  case 'V': printf("%s\n",rev); exit(0); break;
160  case 'v': ++opt_v; break;
161  case 'q': --opt_v; break;
162  case 'i': opt_init=optarg; break;
163  case 'c': opt_cmd.push_back(optarg); break;
164  case 'C': opt_Cmd.push_back(optarg); break;
165  case 'd': if(opt_disk.size())opt_disk=opt_disk+","+optarg;else opt_disk=optarg;break;
166  case 's': if(opt_stats.size())opt_stats=opt_stats+","+optarg;else opt_stats=optarg;break;
167  case 'o': opt_outdir=std::string(optarg)+"/"; break;
168  case 'p': opt_period=optarg; break;
169  case 'w': opt_sys_iowait=1; break;
170  case 'f': opt_fault=1; break;
171  case 'P': charreplace(optarg,' ',',');
172  if(opt_pid.size())opt_pid=opt_pid+","+optarg;
173  else opt_pid=optarg;
174  break;
175  case 1: opt_ymax=strtoul(optarg,NULL,0); break;
176  case 2: opt_yincr=strtoul(optarg,NULL,0); break;
177  case 3: opt_y2max=strtoul(optarg,NULL,0); break;
178  case 4: opt_y2incr=strtoul(optarg,NULL,0); break;
179  case 5: opt_pre=strtoul(optarg,NULL,0); break;
180  case 6: opt_post=strtoul(optarg,NULL,0); break;
181  case 7: opt_graph+=std::string(",")+optarg; break;
182  case 8: opt_ymin =strtoul(optarg,NULL,0);
183  cp = strstr(optarg,":")+1;
184  opt_ymax =strtoul(cp,NULL,0);
185  if ((cp=strstr(cp,":"))) {
186  ++cp;
187  opt_yincr=strtoul(strstr(cp,":")+1,NULL,0);
188  } else
189  opt_yincr=(opt_ymax-opt_ymin)/5;
190  break;
191  case 9: opt_comment=optarg; break;
192  case 10: opt_cmd_iowait=1; break;
193  default:
194  printf( "?? getopt returned character code 0%o ??\n", opt );
195  exit( 1 );
196  }
197  }
198 } /* parse_args */
199 
200 void perror_exit( const char *msg, ... )
201 { char buf[1024];
202  va_list ap;va_start(ap,msg);
203  vsnprintf( buf, sizeof(buf), msg, ap );
204  va_end(ap);
205  TRACE( 0, "%s", buf );
206  perror( buf ); exit(1);
207 }
208 
209 //void atfork_trace(void) { TRACE( 3, "process %d forking", getpid() ); }
210 /* iofd is in/out
211  if iofd[x]==-1 then create a pipe for that index, x, and return the appropriate pipe fd in iofd[x]
212  else if iofd[x]!=x, dup2(iofd[x],x)
213  else inherit
214  Could add ==-2, then close???
215  */
216 pid_t fork_execv( int close_start, int close_cnt, int sleepB4exec_us, int iofd[3], const char *cmd, char *const argv[], char *const env[] )
217 {
218  int pipes[3][2];
219  int lcl_iofd[3];
220  for (auto ii=0; ii<3; ++ii) {
221  lcl_iofd[ii]=iofd[ii];
222  if (iofd[ii]==-1) {
223  pipe(pipes[ii]); /* pipes[ii][0] refers to the read end */
224  iofd[ii]=ii==0?pipes[ii][1]:pipes[ii][0];
225  }
226  }
227  pid_t pid=fork();
228  if (pid < 0) perror_exit("fork");
229  else if (pid == 0) { /* child */
230  if (lcl_iofd[0]==-1) { // deal with child stdin
231  close(pipes[0][1]); // child closes write end of pipe which will be it's stdin
232  int fd=dup2(pipes[0][0],0);
233  TRACE( 3, "fork_execv dupped(%d) onto %d (should be 0)", pipes[0][0], fd );
234  close(pipes[0][0]);
235  }
236  if (sleepB4exec_us) {
237  // Do sleep before dealing with stdout/err incase we want TRACE to go to console
238  //int sts=pthread_atfork( atfork_trace, NULL, NULL );
239  usleep(sleepB4exec_us);
240  TRACE( 1, "fork_execv sleep complete. sleepB4exec_us=%d sts=%d", sleepB4exec_us, 0/*sts*/ );
241  }
242  for (auto ii=1; ii<3; ++ii) { // deal with child stdout/err
243  if (lcl_iofd[ii]==-1) {
244  close(pipes[ii][0]);
245  int fd=dup2(pipes[ii][1],ii);
246  TRACE( 3, "fork_execv dupped(%d) onto %d (should be %d)", pipes[ii][1], fd,ii );
247  close(pipes[ii][1]);
248  } else if (lcl_iofd[ii]!=ii) {
249  int fd=dup2(lcl_iofd[ii],ii);
250  TRACE( 3, "fork_execv dupped(%d) onto %d (should be %d)", pipes[ii][1], fd,ii );
251  }
252  }
253  for (auto ii=close_start; ii<(close_start+close_cnt); ++ii)
254  close(ii);
255  if (env)
256  execve( cmd, argv, env );
257  else
258  execv( cmd, argv );
259  exit(1);
260  } else { // parent
261  for (auto ii=0; ii<3; ++ii)
262  if (lcl_iofd[ii]==-1)
263  close(ii==0?pipes[ii][0]:pipes[ii][1]);
264  }
265 
266  TRACE( 3, "fork_execv pid=%d", pid );
267  return pid;
268 } // fork_execv
269 
270 uint64_t swapPtr( void *X )
271 {
272  uint64_t x=(uint64_t)X;
273  x = (x & 0x00000000ffffffff) << 32 | (x & 0xffffffff00000000) >> 32;
274  x = (x & 0x0000ffff0000ffff) << 16 | (x & 0xfff0000fffff0000) >> 16;
275  x = (x & 0x00ff00ff00ff00ff) << 8 | (x & 0xff00ff00ff00ff00) >> 8;
276  return x;
277 }
278 
279 /*
280  * Input to AWK can either be a file spec or a string.
281  * If input is string, the fork_execv call is told to create pipe for input.
282  *
283  * The run time duration of the AWK prooces can be determined via TRACE:
284 /home/ron/src
285 mu2edaq01 :^) tshow|egrep 'AWK b4 |AWK after read' |tdelta -d 1 -post /b4/ -stats | tail
286 1013 1489724640538688 2047 1116418481 13521 0 6 3 . AWK b4 fork_execv input=(nil)
287 1018 1489724640536624 1969 1111669678 13521 0 6 3 . AWK b4 fork_execv input=(nil)
288 1023 1489724640534717 1866 1107283893 13521 0 6 3 . AWK b4 fork_execv input=(nil)
289 1032 1489724640531756 2289 1100474359 13521 0 13 3 . AWK b4 fork_execv input=(nil)
290 cpu="0"
291  min 1821
292  max 49210
293  tot 293610
294  ave 2645.1351
295  cnt 111
296 --2017-03-17_08:13:23--
297  */
298 static int g_devnullfd=-1;
299 
300 // Run the awk script specified in awk_cmd on the file
301 std::string AWK( std::string const &awk_cmd, const char *file, const char *input )
302 {
303  char readbuf[1024];
304  ssize_t bytes=0, tot_bytes=0;
305  char *const argv_[4]={ (char*)"/bin/gawk",
306  (char*)awk_cmd.c_str(),
307  (char*)file,
308  NULL };
309  pid_t pid;;
310  int infd=0;
311  if (g_devnullfd == -1)
312  g_devnullfd=open("/dev/null",O_WRONLY);
313  if (input != NULL) {
314  infd=-1;
315  }
316  //int iofd[3]={infd,-1,g_devnullfd};
317  int iofd[3]={infd,-1,2};// make stdin=infd, create pipr for stdout, inherit stderr
318  TRACE( 3, "AWK b4 fork_execv input=%p", (void*)input );
319  char *env[1];
320  env[0]=NULL; // mainly do not want big LD_LIBRARY_PATH
321  pid=fork_execv(0,0/*closeCnt*/,0,iofd,"/bin/gawk",argv_,env);
322  if(input/*||iofd[0]!=0*/) {
323  int xx=strlen(input);
324  int sts=write(iofd[0],input,xx);
325  if (sts != xx)
326  perror("write AWK stdin");
327  close(iofd[0]);
328  while ((bytes=read(iofd[1],&readbuf[tot_bytes],sizeof(readbuf)-tot_bytes)) != 0) {
329  TRACE( 3, "AWK while bytes=read > 0 bytes=%zd readbuf=0x%016lx errno=%d", bytes, swapPtr(&readbuf[tot_bytes]), errno );
330  if (bytes == -1) {
331  if (errno == EINTR) continue;
332  break;
333  }
334  tot_bytes+=bytes;
335  }
336  TRACE( 3, "AWK after read tot="+std::to_string((long long unsigned)tot_bytes)+" bytes="+std::to_string((long long unsigned)bytes)+" input="+std::string(input) );
337  } else {
338  while ((bytes=read(iofd[1],&readbuf[tot_bytes],sizeof(readbuf)-tot_bytes)) > 0)
339  tot_bytes+=bytes;
340  TRACE( 3, "AWK after read tot=%zd bytes=%zd [0]=0x%x input=%p", tot_bytes, bytes, readbuf[0], (void*)input );
341  }
342  readbuf[tot_bytes>=0?tot_bytes:0]='\0';
343  close(iofd[1]);
344  TRACE( 3, "AWK after close child stdout. child pid=%d", pid );
345 #if 0
346  int status;
347  pid_t done_pid = waitpid(pid,&status,0);
348  TRACE( 3, "AWK after wait pid=%d done_pid=%d status=%d(0x%x)"
349  , pid, done_pid, status, status );
350 #endif
351  return std::string(readbuf);
352 } // AWK
353 
354 
355 // separate string and _add_to_ vector
356 void string_addto_vector( std::string &instr, std::vector<std::string> &outvec, char delim )
357 {
358  std::stringstream ss(instr);
359  while( ss.good() )
360  { std::string substr;
361  std::getline( ss, substr, delim );
362  outvec.push_back( substr );
363  }
364 }
365 
366 uint64_t gettimeofday_us( void ) //struct timespec *ts )
367 { struct timeval tv;
368  gettimeofday( &tv, NULL );
369  // if (ts) {
370  // ts->tv_sec = tv.tv_sec;
371  // ts->tv_nsec = tv.tv_usec * 1000;
372  // }
373  return (uint64_t)tv.tv_sec*1000000+tv.tv_usec;
374 } /* gettimeofday_us */
375 
376 #define DATA_START " DATA START"
377 #define GNUPLOT_PREFIX (const char *)"\
378 #!/usr/bin/env gnuplot\n\
379 # ./$0\n\
380 # OR\n\
381 # gnuplot -e 'ymin=400;ymax=1400' ./$0\n\
382 # OR try\n\
383 # gnuplot -e 'duration_s=35;set multiplot' ./gnuplot.gnuplot ./gnuplot.1.gnuplot -e 'set nomultiplot;pause -1'\n\
384 if(!exists('ARG0')) ARG0='' # for version 4, use: gnuplot -e ARG0=hello\n\
385 print 'ARG0=',ARG0 # ARG0.. automatically define in gnuplot version 5+\n\
386 if(!exists('ymin')) ymin=%d\n\
387 if(!exists('ymax')) ymax=%d\n\
388 if(!exists('yincr')) yincr=%d\n\
389 if(!exists('y2max')) y2max=%d\n\
390 if(!exists('y2incr')) y2incr=%d\n\
391 if(!exists('png')) png=1\n\
392 if(!exists('duration_s')) duration_s=0\n\
393 if(!exists('width')) width=512\n\
394 if(!exists('height')) height=384\n\
395 thisPid=system('echo `ps -p$$ -oppid=`')\n\
396 thisFile=system('ls -l /proc/'.thisPid.\"/fd | grep -v pipe: | tail -1 | sed -e 's/.*-> //'\")\n\
397 \n\
398 set title \"Disk Write Rate and %%CPU vs. time\\n%s %s %s%s\" # cmd and/or comment at end\n\
399 set xdata time\n\
400 tfmt='%%Y-%%m-%%dT%%H:%%M:%%S' # try to use consistent format\n\
401 set timefmt '%%Y-%%m-%%dT%%H:%%M:%%S'\n\
402 set xlabel 'time'\n\
403 set grid xtics back\n\
404 xstart=system(\"awk '/^....-..-..T/{print$1;exit}' \".thisFile)\n\
405 xend=system(\"awk 'END{print$1}' \".thisFile)\n\
406 print 'xstart='.xstart.' xend='.xend.' duration=',strptime(tfmt,xend)-strptime(tfmt,xstart)\n\
407 if(duration_s>0) end_t=strptime(tfmt,xstart)+duration_s; else end_t=strptime(tfmt,xend)\n\
408 set xrange [xstart:end_t]\n\
409 \n\
410 set ylabel '%s'\n\
411 set ytics nomirror\n\
412 if(ymax==0) set yrange [ymin:*];\\\n\
413 else set yrange [ymin:ymax];set ytics yincr\n\
414 set grid ytics back\n\
415 \n\
416 set y2label '%%CPU, %%MemTotal'\n\
417 set y2tics autofreq\n\
418 if(y2max==0) set y2range [0:*];\\\n\
419 else set y2range [0:y2max];set y2tics y2incr\n\
420 set pointsize .6\n\
421 \n\
422 if(png==1) set terminal png size width,height;\\\n\
423  pngfile=system( 'echo `basename '.thisFile.' .out`.png' );\\\n\
424  set output pngfile;\\\n\
425 else set terminal x11 size width,height\n\
426 \n\
427 plot \"< awk '/^#" DATA_START "/,/NEVER HAPPENS/' \".thisFile "
428 
429 
430 void sigchld_sigaction( int signo, siginfo_t *info, void *context __attribute__((__unused__)) )
431 {
432  /* see man sigaction for description of siginfo_t */
433  for (size_t ii=0; ii < g_pid_vec.size(); ++ii) {
434  pid_t pid=g_pid_vec[ii];
435  if (pid == info->si_pid) {
436  TRACE( 2, "sigchld_sigaction signo=%d status=%d(0x%x) code=%d(0x%x) sending_pid=%d"
437  , signo
438  , info->si_status, info->si_status
439  , info->si_code, info->si_code
440  , info->si_pid
441  );
442  return;
443  }
444  }
445  TRACE( 3, "sigchld_sigaction signo=%d status=%d(0x%x) code=%d(0x%x) sending_pid=%d"
446  , signo
447  , info->si_status, info->si_status
448  , info->si_code, info->si_code
449  , info->si_pid
450  );
451 }
452 
453 
454 void read_proc_file( const char *file, char *buffer, int buffer_size )
455 {
456  TRACE( 4, "read_proc_file b4 open proc file"+std::string(file) );
457  int fd=open(file,O_RDONLY);
458  int offset=0, sts=0;
459  while (1) {
460  sts=read(fd,&buffer[offset],buffer_size-offset);
461  if (sts<=0) {
462  sts=0;
463  break;
464  }
465  offset+=sts;
466  }
467  buffer[sts+offset]='\0';
468  close(fd);
469  TRACE( 4, "read_proc_file after close "+std::string(file)+" read=%d offset=%d",sts,offset );
470 }
471 
472 
473 pid_t check_pid_vec( void )
474 {
475  for (size_t ii=0; ii < g_pid_vec.size(); ) {
476  pid_t pid=g_pid_vec[ii];
477  int status;
478  pid_t pp = waitpid( pid, &status, WNOHANG );
479  TRACE( 3, "check_pid_vec %d=waitpid(pid=%d) errno=%d", pp, pid, errno );
480  if (pp > 0)
481  g_pid_vec.erase( g_pid_vec.begin()+ii );
482  else if (pp == -1) {
483  if (errno == ECHILD && kill(pid,0)==0)
484  // there is a process, but not my child process
485  ++ii;
486  else
487  // some other error
488  g_pid_vec.erase( g_pid_vec.begin()+ii );
489  }
490  else
491  ++ii;
492  }
493  if (g_pid_vec.size() == 0)
494  return -1;
495  else
496  return 0;
497 }
498 
499 void cleanup( void )
500 {
501  TRACE( 1, "atexit cleanup g_pid_vec.size()=%zd\n", g_pid_vec.size() );
502  for (std::vector<pid_t>::iterator pid=g_pid_vec.begin(); pid!=g_pid_vec.end(); ++pid) {
503  kill( *pid, SIGHUP );
504  }
505 }
506 #if (defined(__cplusplus)&&(__cplusplus>=201103L)) || (defined(__STDC_VERSION__)&&(__STDC_VERSION__>=201112L))
507 # pragma GCC diagnostic push
508 # pragma GCC diagnostic ignored "-Wunused-parameter" /* b/c of TRACE_XTRA_UNUSED */
509 #endif
510 void sigint_sigaction( int signo, siginfo_t *info, void *context )
511 {
512  cleanup();
513  exit( 1 );
514 }
515 #if (defined(__cplusplus)&&(__cplusplus>=201103L)) || (defined(__STDC_VERSION__)&&(__STDC_VERSION__>=201112L))
516 # pragma GCC diagnostic pop
517 #endif
518 
519 
520 int
521 main( int argc
522  , char *argv[] )
523 {
524  struct timeval tv;
525  int post_periods_completed=0;
526  parse_args( argc, argv );
527  if ( (argc-optind)!=0
528  || ( opt_cmd.size()==0
529  && opt_pid.size()==0)) { //(argc-optind) is the number of non-opt args supplied.
530  int ii;
531  printf( "unexpected argument(s) %d!=0\n", argc-optind );
532  for (ii=0; (optind+ii)<argc; ++ii)
533  printf("arg%d=%s\n",ii+1,argv[optind+ii]);
534  printf( USAGE ); exit( 0 );
535  }
536 
537  std::vector<std::string> graphs;
538  string_addto_vector( opt_graph, graphs, ',' );
539 
540  char motherboard[1024]={0};
541  if (getuid() == 0) {
542  FILE *fp=popen( "dmidecode | grep -m2 'Product Name:' | tail -1", "r" );
543  fread( motherboard, 1, sizeof(motherboard), fp );
544  pclose( fp );
545  }
546  TRACE( 1, "main - motherboard="+std::string(motherboard) );
547 
548  /* Note, when doing "waitpid" the wait would sometimes take a "long"
549  time (10's to 100's milliseconds; rcu???) If signal is generated
550  (i.e SA_NOCLDWAIT w/ sigchld_sigaction (not SIG_IGN)), it would
551  sometimes effect the read or write calls for the following AWK forks.
552  So, use SIG_IGN+SA_NOCLDWAIT.
553  */
554  struct sigaction sigaction_s;
555 #ifndef DO_SIGCHLD
556 # define DO_SIGCHLD 1
557 #endif
558 #if DO_SIGCHLD
559  sigaction_s.sa_sigaction = sigchld_sigaction;
560  sigaction_s.sa_flags = SA_SIGINFO|SA_NOCLDWAIT;
561 #else
562  sigaction_s.sa_handler = SIG_IGN;
563  sigaction_s.sa_flags = SA_NOCLDWAIT;
564 #endif
565  sigemptyset(&sigaction_s.sa_mask);
566  sigaction( SIGCHLD, &sigaction_s, NULL );
567 
568  sigaction_s.sa_sigaction = sigint_sigaction;
569  sigaction_s.sa_flags = SA_SIGINFO;
570  sigaction( SIGINT, &sigaction_s, NULL );
571 
572  //may return 0 when not able to detect
573  //long long unsigned concurentThreadsSupported = std::thread::hardware_concurrency();
574  long long unsigned concurentThreadsSupported = sysconf(_SC_NPROCESSORS_ONLN);
575  //TRACE_CNTL( "reset" ); TRACE_CNTL( "modeM", 1L );
576  TRACE( 0, "main concurentThreadsSupported=%u opt_stats="+opt_stats, concurentThreadsSupported );
577 
578  char run_time[80];
579  gettimeofday( &tv, NULL );
580  strftime( run_time, sizeof(run_time), "%FT%H%M%S", localtime(&tv.tv_sec) );
581  TRACE( 0, "main run_time="+std::string(run_time) );
582 
583  // get hostname
584  struct utsname ubuf;
585  uname( &ubuf );
586  char *dot;
587  if ((dot=strchr(ubuf.nodename,'.')) != NULL)
588  *dot = '\0';
589  std::string hostname(ubuf.nodename);
590  TRACE( 1,"release="+std::string(ubuf.release)+" version="+std::string(ubuf.version) );
591 
592  // get system mem (KB)
593  std::string memKB=AWK( "NR==1{print$2;exit}","/proc/meminfo",NULL );
594  memKB = memKB.substr(0,memKB.size()-1); // remove trailing newline
595 
596  std::string dat_file_out(opt_outdir+"periodic_"+run_time+"_"+hostname+"_stats.out");
597 
598  double period=atof(opt_period);
599 
600  atexit( cleanup );
601  pid_t pp;
602  std::vector<std::string> pidfile;
603 
604  std::vector<std::string> stats;
605 
606  // For each cmd: create out file, fork process (with delay param),
607  // add to stats vec to get CPU info, add to graphs vec to plot cmd CPU
608  for (size_t ii=0; ii<opt_cmd.size(); ++ii) {
609  char cmd_file_out[1024];
610  snprintf( cmd_file_out, sizeof(cmd_file_out), "%speriodic_%s_%s_cmd%zd.out"
611  , opt_outdir.c_str(), run_time, hostname.c_str(), ii );
612  int fd=open( cmd_file_out, O_WRONLY|O_CREAT,0666 );
613  TRACE( 0, "main fd=%d opt_cmd="+opt_cmd[ii]+" cmd_file_out="+std::string(cmd_file_out), fd );
614  int iofd[3]={0,fd,fd}; // redirect stdout/err to the cmd-out-file
615  char *const argv_[4]={ (char*)"/bin/sh",
616  (char*)"-c",
617  (char*)opt_cmd[ii].c_str(),
618  NULL };
619  g_pid_vec.push_back( fork_execv(0,0,(int)(period*opt_pre*1e6),iofd,"/bin/sh",argv_,NULL) );
620  close(fd); // the output file has been given to the subprocess
621  std::string pidstr=std::to_string((long long int)g_pid_vec[ii]);
622  pidfile.push_back( "/proc/"+pidstr+"/stat" );
623  //pidfile.push_back( "/proc/"+pidstr+"/task/"+pidstr+"/stat" );
624  char desc[128], ss[1024];
625  // field 14-17: Documentation/filesystems/proc.txt Table 1-4: utime stime cutime cstime
626  snprintf( ss, sizeof(ss), "CPUcmd%zd?%s?NR==1?$14+$15?1?yes", ii, pidfile[ii].c_str() );
627  stats.push_back( ss );
628 
629  snprintf( desc, sizeof(desc), "CPU+cmd%zd", ii );
630  graphs.push_back( desc ); // cmd0 is in the GNUPLOT_PREFIX
631  snprintf( ss, sizeof(ss), "%s?%s?NR==1?$14+$15+16+$17?1?yes", desc, pidfile[ii].c_str() );
632  stats.push_back( ss );
633 
634  snprintf( desc, sizeof(desc), "WaitBlkIOcmd%zd", ii );
635  if (opt_cmd_iowait) graphs.push_back( desc );
636  snprintf( ss, sizeof(ss), "%s?%s?NR==1?$42?1?yes", desc, pidfile[ii].c_str() );
637  stats.push_back( ss );
638 
639  snprintf( desc, sizeof(desc), "Faultcmd%zd", ii );
640  if (opt_fault) graphs.push_back( desc );
641  snprintf( ss, sizeof(ss), "%s?%s?NR==1?$10+$11+$12+$13?4096.0/1048576?yes", desc, pidfile[ii].c_str() );
642  stats.push_back( ss );
643  }
644  for (size_t ii=0; ii<opt_Cmd.size(); ++ii) {
645  char cmd_file_out[1024];
646  snprintf( cmd_file_out, sizeof(cmd_file_out), "%speriodic_%s_%s_cmd%zd.out"
647  , opt_outdir.c_str(), run_time, hostname.c_str(), ii+opt_cmd.size() );
648  int fd=open( cmd_file_out, O_WRONLY|O_CREAT,0666 );
649  TRACE( 0, "main fd=%d opt_Cmd="+opt_Cmd[ii]+" cmd_file_out="+std::string(cmd_file_out), fd );
650  int iofd[3]={0,fd,fd}; // redirect stdout/err to the cmd-out-file
651  char *const argv_[4]={ (char*)"/bin/sh",
652  (char*)"-c",
653  (char*)opt_Cmd[ii].c_str(),
654  NULL };
655  g_pid_vec.push_back( fork_execv(0,0,(int)(period*opt_pre*1e6),iofd,"/bin/sh",argv_,NULL) );
656  close(fd); // the output file has been given to the subprocess
657  std::string pidstr=std::to_string((long long int)g_pid_vec[ii]);
658  pidfile.push_back( "/proc/"+pidstr+"/stat" );
659  //pidfile.push_back( "/proc/"+pidstr+"/task/"+pidstr+"/stat" );
660  char desc[128], ss[1024];
661  snprintf( desc, sizeof(desc), "CPU+cmd%zd", ii+opt_cmd.size() );
662  snprintf( ss, sizeof(ss), "CPUcmd%zd?%s?NR==1?$14+$15?1?yes", ii+opt_cmd.size(), pidfile[ii].c_str() );
663  stats.push_back( ss );
664  snprintf( ss, sizeof(ss), "CPU+cmd%zd?%s?NR==1?$14+$15+16+$17?1?yes", ii+opt_cmd.size(), pidfile[ii].c_str() );
665  stats.push_back( ss );
666  // JUST DONT ADD THESE TO graphs
667  }
668  std::vector<std::string> pids;
669  if (opt_pid.size())
670  string_addto_vector( opt_pid, pids, ',' );
671  for (size_t ii=0; ii<pids.size(); ++ii) {
672  g_pid_vec.push_back( std::stoi(pids[ii]) );
673  TRACE( 1, "pid=%s g_pid_vec.size()=%ld", pids[ii].c_str(), g_pid_vec.size() );
674  pidfile.push_back( "/proc/"+pids[ii]+"/stat" );
675  char desc[128], ss[1024];
676  // field 14-17: Documentation/filesystems/proc.txt Table 1-4: utime stime cutime cstime
677  snprintf( ss, sizeof(ss), "CPUpid%zd?%s?NR==1?$14+$15?1?yes", ii, pidfile[ii].c_str() );
678  stats.push_back( ss );
679 
680  std::ifstream t("/proc/"+pids[ii]+"/comm");
681  std::string comm((std::istreambuf_iterator<char>(t)),
682  std::istreambuf_iterator<char>());
683  comm = comm.substr(0,comm.size()-1); // strip nl
684 
685  snprintf( desc, sizeof(desc), "CPU+pid%zd_%s", ii, comm.c_str() );
686  graphs.push_back( desc ); // cmd0 is in the GNUPLOT_PREFIX
687  snprintf( ss, sizeof(ss), "%s?%s?NR==1?$14+$15+16+$17?1?yes", desc, pidfile[ii].c_str() );
688  stats.push_back( ss );
689 
690  snprintf( desc, sizeof(desc), "WaitBlkIOpid%zd", ii );
691  if (opt_cmd_iowait) graphs.push_back( desc );
692  snprintf( ss, sizeof(ss), "%s?%s?NR==1?$42?1?yes", desc, pidfile[ii].c_str() );
693  stats.push_back( ss );
694 
695  snprintf( desc, sizeof(desc), "Faultpid%zd", ii );
696  if (opt_fault) graphs.push_back( desc );
697  snprintf( ss, sizeof(ss), "%s?%s?NR==1?$10+$11+$12+$13?4096.0/1048576?yes", desc, pidfile[ii].c_str() );
698  stats.push_back( ss );
699  }
700 
701 
702  stats.push_back("CPUnode");
703  stats.push_back("IOWait"); if (opt_sys_iowait) { graphs.push_back("IOWait"); }
704  stats.push_back("Cached");
705  stats.push_back("Dirty");
706  stats.push_back("Free");
707 
708  if (opt_disk.size()) {
709  std::vector<std::string> tmp;
710  string_addto_vector( opt_disk, tmp, ',' );
711  for (std::vector<std::string>::iterator dk=tmp.begin(); dk!=tmp.end(); ++dk) {
712  // /proc/diskstat has 11 field after an initial 3 (14 total) for each device
713  // The 7th field after the device name (the 10th field total) is # of sectors written.
714  // Sectors appear to be 512 bytes. So, deviding by 2048 converts to MBs.
715  std::string statstr=*dk+"_wrMB/s?/proc/diskstats?/"+*dk+"/?$10?(1.0/2048)?yes";
716  stats.push_back(statstr);
717  std::vector<std::string> stat_spec;
718  string_addto_vector( statstr, stat_spec, '?' );
719  graphs.push_back( stat_spec[s_desc] );
720 
721  statstr=*dk+"_rdMB/s?/proc/diskstats?/"+*dk+"/?$6?(1.0/2048)?yes";
722  stats.push_back(statstr);
723  stat_spec.clear();
724  string_addto_vector( statstr, stat_spec, '?' );
725  //graphs.push_back( stat_spec[s_desc] ); // don't add read by default -- can be added with --graph
726  }
727  }
728 
729  if (opt_stats.size()) {
730  std::vector<std::string> tmp_stats;
731  string_addto_vector( opt_stats, tmp_stats, ',' );
732  for (std::vector<std::string>::iterator st=tmp_stats.begin();
733  st!=tmp_stats.end(); ++st) {
734  stats.push_back(*st);
735  std::vector<std::string> stat_spec;
736  string_addto_vector( *st, stat_spec, '?' );
737  graphs.push_back( stat_spec[s_desc] );
738  }
739  }
740 
741  std::vector<long> pre_vals;
742  std::vector<double> multipliers;
743  std::vector<std::vector<std::string>> spec2(stats.size());
744  std::vector<std::string> awkCmd;
745 
746  std::string header_str( "#" DATA_START "\n#_______time_______" );
747 
748  int outfd=open(dat_file_out.c_str(),O_WRONLY|O_CREAT,0777);
749  //FILE *outfp=stdout;
750  FILE *outfp = fdopen(outfd,"w");
751 
752  std::string cmd_comment("");
753  if (opt_cmd.size())
754  cmd_comment += "\\ncmd: "+opt_cmd[0];
755  if (opt_comment.size())
756  cmd_comment += "\\ncomment: " + opt_comment;
757  fprintf( outfp, GNUPLOT_PREFIX, opt_ymin, opt_ymax, opt_yincr, opt_y2max, opt_y2incr
758  , run_time, hostname.c_str(), ubuf.release
759  , cmd_comment.c_str()
760  , "disk write MB/s" );
761 
762  uint64_t t_start=gettimeofday_us();
763 
764  // build header string and get initial values for "rate" stats
765  bool first_graph_spec_added=false;
766  for (size_t ii=0; ii<stats.size(); ++ii) {
767  std::vector<std::string> stat_spec;
768  string_addto_vector( stats[ii], stat_spec, '?' );
769  if (stat_spec[s_desc]=="CPUnode" && stat_spec.size()==1)
770  // Ref. Documentation/filesystems/proc.txt: user+nice+system (skip idle) +iowait+irq+softirq+steal (skip guest)
771  stats[ii]+="?/proc/stat?/^cpu[^0-9]/?$2+$3+$4+$6+$7+$8+$9?1.0/"+std::to_string(concurentThreadsSupported)+"?yes";
772  else if (stat_spec[s_desc]=="IOWait" && stat_spec.size()==1)
773  stats[ii]+="?/proc/stat?/^cpu[^0-9]/?$6?1.0/"+std::to_string(concurentThreadsSupported)+"?yes";
774  else if (stat_spec[s_desc]=="Cached" && stat_spec.size()==1)
775  stats[ii]+="?/proc/meminfo?/^(Cached|Buffers):/?$2?1?no";
776  else if (stat_spec[s_desc]=="Dirty" && stat_spec.size()==1)
777  stats[ii]+="?/proc/meminfo?/^Dirty:/?$2?1?no";
778  else if (stat_spec[s_desc]=="Free" && stat_spec.size()==1)
779  stats[ii]+="?/proc/meminfo?/^MemFree:/?$2?1?no";
780 
781  header_str += " "+stat_spec[s_desc];
782 
783  string_addto_vector( stats[ii], spec2[ii], '?' );
784  char awk_cmd[1024];
785  snprintf( awk_cmd, sizeof(awk_cmd), "%s{vv+=%s}END{print vv}"
786  //snprintf( awk_cmd, sizeof(awk_cmd), "%s{vv+=%s;print \"vv now\",vv > \"/dev/stderr\";}END{print vv}"
787  , spec2[ii][s_linespec].c_str(), spec2[ii][s_fieldspec].c_str() );
788  awkCmd.push_back(awk_cmd);
789 
790  std::string stat=AWK( awkCmd.back(), spec2[ii][s_file].c_str(), NULL );
791 
792  pre_vals.push_back(atol(stat.c_str()));
793  multipliers.push_back(atof(AWK( "BEGIN{print "+spec2[ii][s_multiplier]+"}","/dev/null",NULL ).c_str()) );
794  //fprintf( stderr, " l=%s", spec2[ii][s_linespec].c_str() );
795  for (size_t jj=0; jj<graphs.size(); ++jj)
796  if (graphs[jj] == stat_spec[s_desc]) {
797  if (first_graph_spec_added) fprintf( outfp, ",\\\n '' " );
798  if (strncmp(stat_spec[s_desc].c_str(),"CPU",3)==0)
799  fprintf( outfp, "using 1:%zd title '%s' w linespoints axes x1y2", ii+2, stat_spec[s_desc].c_str() );
800  else if (stat_spec[s_desc] == "Cached" || stat_spec[s_desc] == "Dirty" || stat_spec[s_desc] == "Free")
801  fprintf( outfp, "using 1:($%zd/%s*100) title '%s%%' w linespoints axes x1y2", ii+2, memKB.c_str(), stat_spec[s_desc].c_str() );
802  else if (stat_spec[s_desc].substr(0,6) == "CPUcmd" || stat_spec[s_desc].substr(0,6) == "CPU+cm")
803  fprintf( outfp, "using 1:%zd title '%s' w linespoints axes x1y2", ii+2, stat_spec[s_desc].c_str() );
804  else if (stat_spec[s_desc].substr(0,12) == "WaitBlkIOcmd" )
805  fprintf( outfp, "using 1:%zd title '%s' w linespoints axes x1y2", ii+2, stat_spec[s_desc].c_str() );
806  else
807  fprintf( outfp, "using 1:%zd title '%s' w linespoints axes x1y1", ii+2, stat_spec[s_desc].c_str() );
808  first_graph_spec_added=true;
809  }
810  }
811  header_str += " #\n";
812 
813  fprintf( outfp, "\nif(png==0) pause -1 'Press Enter/Return or ^C to finish'\n\
814 exit\n" );
815 
816  // print the cmds
817  fprintf( outfp, "cmds:\n" );
818  for (size_t ii=0; ii<opt_cmd.size(); ++ii) {
819  std::string ss=opt_cmd[ii]+"\n";
820  fprintf( outfp, "%s", ss.c_str() );
821  }
822 
823  // print the specs
824  fprintf( outfp, "stats:\n" );
825  for (size_t ii=0; ii<stats.size(); ++ii) {
826  std::string ss=stats[ii]+"\n";
827  fprintf( outfp, "%s", ss.c_str() );
828  }
829 
830  // now print header
831  fprintf( outfp, "%s", header_str.c_str() ); fflush( outfp );
832 
833  std::string tmpdbg("main lp=%d done stat%zd=%ld rate=%f ");
834  //char tmpdbgbuf[128];
835  char proc_stats[8192];
836  char *awk_in;
837  int lp;
838 
839  // - - - - - - - - - - - - - - - - - - - - - - - -
840  // wait a period and then start collecting the stats
841  eintr1:
842  int64_t t_sleep=(t_start+(uint64_t)(period*1e6))-gettimeofday_us();
843  if (t_sleep > 0) {
844  int sts=usleep( t_sleep );
845  TRACE( 3,"main usleep sts=%d errno=%d",sts, errno );
846  if(errno == EINTR)
847  goto eintr1;
848  }
849 
850 # define MAX_LP 600
851  for (lp=2; lp<MAX_LP; ++lp) {
852  char str[80];
853  gettimeofday( &tv, NULL );
854  strftime( str, sizeof(str), "%FT%T", localtime(&tv.tv_sec) );
855  //fprintf(outfp, "%s.%ld", str, tv.tv_usec/100000 );
856  fprintf(outfp, "%s", str );
857  std::string prv_file("");
858  for (size_t ii=0; ii<stats.size(); ++ii) {
859  TRACE( 3, "main lp=%d start stat%zd", lp, ii );
860  char const *awk_file;
861  if (ii < (2*opt_cmd.size())) { // For each cmd, the
862  // /proc/<pid>/stat file
863  // will be referenced twice.
864  if ((ii&1)==0) {
865  read_proc_file( pidfile[ii/2].c_str(),proc_stats, sizeof(proc_stats) );
866  }
867  awk_in=proc_stats; awk_file=NULL;
868  } else if (spec2[ii][s_file] != prv_file) {
869  prv_file = spec2[ii][s_file];
870  read_proc_file( spec2[ii][s_file].c_str(), proc_stats, sizeof(proc_stats) );
871  awk_in=proc_stats; awk_file=NULL;
872  }
873 
874 
875  std::string stat_str=AWK( awkCmd[ii], awk_file, awk_in );
876 
877  long stat=atol(stat_str.c_str());
878 
879  if (spec2[ii][s_rate] == "yes") {
880  double rate;
881  if (stat_str!="\n")
882  rate=(stat-pre_vals[ii])*multipliers[ii]/period;
883  else
884  rate=0.0;
885  TRACE( 3, tmpdbg+"stat_str[0]=0x%x stat_str.size()=%zd", lp,ii,stat,rate, stat_str[0], stat_str.size() );
886  fprintf(outfp, " %.2f",rate );
887  if (rate < 0.0 && spec2[ii][s_file] == "/proc/diskstats") {
888  TRACE( 0, "main stat:"+spec2[ii][s_desc]+" rate=%f pre_val=%ld stat=%ld stat_str=\""+stat_str\
889  +"\" awkCmd="+awkCmd[ii]+" proc_diskstats="+proc_stats
890  , rate, pre_vals[ii], stat );
891  //TRACE_CNTL( "modeM", 0L );
892  }
893  pre_vals[ii] = stat;
894  } else {
895  TRACE( 3, "main lp=%d done stat%zd=%ld", lp, ii, stat );
896  fprintf(outfp, " %.2f",stat*multipliers[ii] );
897  }
898  }
899  fprintf(outfp,"\n"); fflush(outfp);
900  eintr2:
901  int64_t t_sleep=(t_start+(uint64_t)(period*lp*1000000))-gettimeofday_us();
902  if (t_sleep > 0) {
903  int sts=usleep( t_sleep );
904  TRACE( 3,"main usleep sts=%d errno=%d",sts,errno );
905  if(errno == EINTR)
906  goto eintr2;
907  }
908  pp = check_pid_vec();
909  TRACE( 2, "main pp=%d t_sleep=%ld", pp, t_sleep );
910  if (pp == -1) {
911  if (post_periods_completed == 0)
912  TRACE( 1, "main processes complete - waiting %d post periods", opt_post );
913  if (post_periods_completed++ == opt_post)
914  break;
915  }
916  }
917  if (lp==MAX_LP) {
918  fprintf(outfp,"# MAX_LP abort\n" );
919  }
920 
921  //TRACE( 0, "main waiting for pid=%d", pid );
922  //wait(&status);
923  //TRACE( 0, "main status=%d",status );
924  TRACE( 0, "main done/complete/returning" );
925  //TRACE_CNTL( "modeM", 0L );
926  return (0);
927 } // main