otsdaq  v2_04_02
ARTDAQSupervisor.cc
1 
2 #include "otsdaq/ARTDAQSupervisor/ARTDAQSupervisor.hh"
3 
4 #include "artdaq-core/Utilities/configureMessageFacility.hh"
5 #include "artdaq/BuildInfo/GetPackageBuildInfo.hh"
6 #include "artdaq/DAQdata/Globals.hh"
7 #include "cetlib_except/exception.h"
8 #include "fhiclcpp/make_ParameterSet.h"
9 
10 //#include "otsdaq/TablePlugins/ARTDAQBoardReaderTable.h"
11 //#include "otsdaq/TablePlugins/ARTDAQBuilderTable.h"
12 //#include "otsdaq/TablePlugins/ARTDAQDataLoggerTable.h"
13 //#include "otsdaq/TablePlugins/ARTDAQDispatcherTable.h"
14 
15 #include <boost/exception/all.hpp>
16 #include <boost/filesystem.hpp>
17 
18 #include <signal.h>
19 
20 using namespace ots;
21 
22 XDAQ_INSTANTIATOR_IMPL(ARTDAQSupervisor)
23 
24 #define ARTDAQ_FCL_PATH std::string(__ENV__("USER_DATA")) + "/" + "ARTDAQConfigurations/"
25 #define FAKE_CONFIG_NAME "ots_config"
26 #define DAQINTERFACE_PORT \
27  std::atoi(__ENV__("ARTDAQ_BASE_PORT")) + \
28  (partition_ * std::atoi(__ENV__("ARTDAQ_PORTS_PER_PARTITION")))
29 
30 static ARTDAQSupervisor* instance = nullptr;
31 static std::unordered_map<int, struct sigaction> old_actions =
32  std::unordered_map<int, struct sigaction>();
33 static bool sighandler_init = false;
34 static void signal_handler(int signum)
35 {
36  // Messagefacility may already be gone at this point, TRACE ONLY!
37  TRACE_STREAMER(TLVL_ERROR, &("ARTDAQsupervisor")[0], 0, 0, 0)
38  << "A signal of type " << signum
39  << " was caught by ARTDAQSupervisor. Shutting down DAQInterface, "
40  "then proceeding with default handlers!";
41 
42  if(instance)
43  instance->destroy();
44 
45  sigset_t set;
46  pthread_sigmask(SIG_UNBLOCK, NULL, &set);
47  pthread_sigmask(SIG_UNBLOCK, &set, NULL);
48 
49  TRACE_STREAMER(TLVL_ERROR, &("SharedMemoryManager")[0], 0, 0, 0)
50  << "Calling default signal handler";
51  if(signum != SIGUSR2)
52  {
53  sigaction(signum, &old_actions[signum], NULL);
54  kill(getpid(), signum); // Only send signal to self
55  }
56  else
57  {
58  // Send Interrupt signal if parsing SIGUSR2 (i.e. user-defined exception that
59  // should tear down ARTDAQ)
60  sigaction(SIGINT, &old_actions[SIGINT], NULL);
61  kill(getpid(), SIGINT); // Only send signal to self
62  }
63 }
64 
65 static void init_sighandler(ARTDAQSupervisor* inst)
66 {
67  static std::mutex sighandler_mutex;
68  std::unique_lock<std::mutex> lk(sighandler_mutex);
69 
70  if(!sighandler_init)
71  {
72  sighandler_init = true;
73  instance = inst;
74  std::vector<int> signals = {
75  SIGINT,
76  SIGILL,
77  SIGABRT,
78  SIGFPE,
79  SIGSEGV,
80  SIGPIPE,
81  SIGALRM,
82  SIGTERM,
83  SIGUSR2,
84  SIGHUP}; // SIGQUIT is used by art in normal operation
85  for(auto signal : signals)
86  {
87  struct sigaction old_action;
88  sigaction(signal, NULL, &old_action);
89 
90  // If the old handler wasn't SIG_IGN (it's a handler that just
91  // "ignore" the signal)
92  if(old_action.sa_handler != SIG_IGN)
93  {
94  struct sigaction action;
95  action.sa_handler = signal_handler;
96  sigemptyset(&action.sa_mask);
97  for(auto sigblk : signals)
98  {
99  sigaddset(&action.sa_mask, sigblk);
100  }
101  action.sa_flags = 0;
102 
103  // Replace the signal handler of SIGINT with the one described by
104  // new_action
105  sigaction(signal, &action, NULL);
106  old_actions[signal] = old_action;
107  }
108  }
109  }
110 }
111 
112 //========================================================================================================================
113 ARTDAQSupervisor::ARTDAQSupervisor(xdaq::ApplicationStub* stub)
114  : CoreSupervisorBase(stub)
115  , daqinterface_ptr_(NULL)
116  , partition_(getSupervisorProperty("partition", 0))
117  , daqinterface_state_("notrunning")
118  , runner_thread_(nullptr)
119 {
120  __SUP_COUT__ << "Constructor." << __E__;
121 
122  INIT_MF("ARTDAQSupervisor");
123  init_sighandler(this);
124 
125  // Write out settings file
126  auto settings_file = __ENV__("DAQINTERFACE_SETTINGS");
127  std::ofstream o(settings_file, std::ios::trunc);
128 
129  o << "log_directory: "
130  << getSupervisorProperty("log_directory", std::string(__ENV__("OTSDAQ_LOG_DIR")))
131  << std::endl;
132  o << "record_directory: "
133  << getSupervisorProperty("record_directory", ARTDAQ_FCL_PATH) << std::endl;
134  o << "package_hashes_to_save: "
135  << getSupervisorProperty("package_hashes_to_save", "[artdaq]") << std::endl;
136  // Note that productsdir_for_bash_scripts is REQUIRED!
137  o << "productsdir_for_bash_scripts: "
138  << getSupervisorProperty("productsdir_for_bash_scripts",
139  std::string(__ENV__("OTS_PRODUCTS")))
140  << std::endl;
141  o << "boardreader timeout: " << getSupervisorProperty("boardreader_timeout", 30)
142  << std::endl;
143  o << "eventbuilder timeout: " << getSupervisorProperty("eventbuilder_timeout", 30)
144  << std::endl;
145  o << "datalogger timeout: " << getSupervisorProperty("datalogger_timeout", 30)
146  << std::endl;
147  o << "dispatcher timeout: " << getSupervisorProperty("dispatcher_timeout", 30)
148  << std::endl;
149  o << "max_fragment_size_bytes: "
150  << getSupervisorProperty("max_fragment_size_bytes", 1048576) << std::endl;
151  o << "transfer_plugin_to_use: "
152  << getSupervisorProperty("transfer_plugin_to_use", "Autodetect") << std::endl;
153  o << "all_events_to_all_dispatchers: " << std::boolalpha
154  << getSupervisorProperty("all_events_to_all_dispatchers", true) << std::endl;
155  o << "data_directory_override: "
156  << getSupervisorProperty("data_directory_override",
157  std::string(__ENV__("ARTDAQ_OUTPUT_DIR")))
158  << std::endl;
159  o << "max_configurations_to_list: "
160  << getSupervisorProperty("max_configurations_to_list", 10) << std::endl;
161  o << "disable_unique_rootfile_labels: "
162  << getSupervisorProperty("disable_unique_rootfile_labels", false) << std::endl;
163  o << "use_messageviewer: " << std::boolalpha
164  << getSupervisorProperty("use_messageviewer", false) << std::endl;
165  o << "fake_messagefacility: " << std::boolalpha
166  << getSupervisorProperty("fake_messagefacility", false) << std::endl;
167  o << "advanced_memory_usage: " << std::boolalpha
168  << getSupervisorProperty("advanced_memory_usage", false) << std::endl;
169 
170  o.close();
171  __SUP_COUT__ << "Constructed." << __E__;
172 } // end constructor()
173 
174 //========================================================================================================================
175 ARTDAQSupervisor::~ARTDAQSupervisor(void)
176 {
177  __SUP_COUT__ << "Destructor." << __E__;
178  destroy();
179  __SUP_COUT__ << "Destructed." << __E__;
180 } // end destructor()
181 
182 //========================================================================================================================
183 void ARTDAQSupervisor::init(void)
184 {
185  stop_runner_();
186 
187  __SUP_COUT__ << "Initializing..." << __E__;
188  {
189  std::lock_guard<std::mutex> lk(daqinterface_mutex_);
190 
191  // allSupervisorInfo_.init(getApplicationContext());
192  artdaq::configureMessageFacility("ARTDAQSupervisor");
193  __SUP_COUT__ << "artdaq MF configured." << __E__;
194 
195  // initialization
196  char* daqinterface_dir = getenv("ARTDAQ_DAQINTERFACE_DIR");
197  if(daqinterface_dir == NULL)
198  {
199  __SS__ << "ARTDAQ_DAQINTERFACE_DIR environment variable not set! This "
200  "means that DAQInterface has not been setup!"
201  << __E__;
202  __SUP_SS_THROW__;
203  }
204  else
205  {
206  __SUP_COUT__ << "Initializing Python" << __E__;
207  Py_Initialize();
208 
209  __SUP_COUT__ << "Adding DAQInterface directory to PYTHON_PATH" << __E__;
210  PyObject* sysPath = PySys_GetObject((char*)"path");
211  PyObject* programName = PyString_FromString(daqinterface_dir);
212  PyList_Append(sysPath, programName);
213  Py_DECREF(programName);
214 
215  __SUP_COUT__ << "Creating Module name" << __E__;
216  PyObject* pName = PyString_FromString("rc.control.daqinterface");
217  /* Error checking of pName left out */
218 
219  __SUP_COUT__ << "Importing module" << __E__;
220  PyObject* pModule = PyImport_Import(pName);
221  Py_DECREF(pName);
222 
223  if(pModule == NULL)
224  {
225  PyErr_Print();
226  __SS__ << "Failed to load rc.control.daqinterface" << __E__;
227  __SUP_SS_THROW__;
228  }
229  else
230  {
231  __SUP_COUT__ << "Loading python module dictionary" << __E__;
232  PyObject* pDict = PyModule_GetDict(pModule);
233  if(pDict == NULL)
234  {
235  PyErr_Print();
236  __SS__ << "Unable to load module dictionary" << __E__;
237  __SUP_SS_THROW__;
238  }
239  else
240  {
241  Py_DECREF(pModule);
242 
243  __SUP_COUT__ << "Getting DAQInterface object pointer" << __E__;
244  PyObject* di_obj_ptr = PyDict_GetItemString(pDict, "DAQInterface");
245 
246  __SUP_COUT__ << "Filling out DAQInterface args struct" << __E__;
247  PyObject* pArgs = PyTuple_New(0);
248 
249  PyObject* kwargs = Py_BuildValue("{s:s, s:s, s:i, s:i, s:s, s:s}",
250  "logpath",
251  ".daqint.log",
252  "name",
253  "DAQInterface",
254  "partition_number",
255  partition_,
256  "rpc_port",
257  DAQINTERFACE_PORT,
258  "rpc_host",
259  "localhost",
260  "control_host",
261  "localhost");
262 
263  __SUP_COUT__ << "Calling DAQInterface Object Constructor" << __E__;
264  daqinterface_ptr_ = PyObject_Call(di_obj_ptr, pArgs, kwargs);
265 
266  Py_DECREF(di_obj_ptr);
267  }
268  }
269  }
270 
271  getDAQState_();
272  }
273  start_runner_();
274  __SUP_COUT__ << "Initialized." << __E__;
275 } // end init()
276 
277 //========================================================================================================================
278 void ARTDAQSupervisor::destroy(void)
279 {
280  __SUP_COUT__ << "Destroying..." << __E__;
281 
282  if(daqinterface_ptr_ != NULL)
283  {
284  __SUP_COUT__ << "Calling recover transition" << __E__;
285  std::lock_guard<std::mutex> lk(daqinterface_mutex_);
286  PyObject* pName = PyString_FromString("do_recover");
287  PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, NULL);
288 
289  __SUP_COUT__ << "Making sure that correct state has been reached" << __E__;
290  getDAQState_();
291  while(daqinterface_state_ != "stopped")
292  {
293  getDAQState_();
294  __SUP_COUT__ << "State is " << daqinterface_state_
295  << ", waiting 1s and retrying..." << __E__;
296  usleep(1000000);
297  }
298 
299  Py_XDECREF(daqinterface_ptr_);
300  daqinterface_ptr_ = NULL;
301  }
302 
303  Py_Finalize();
304  __SUP_COUT__ << "Destroyed." << __E__;
305 } // end destroy()
306 
307 //========================================================================================================================
308 void ARTDAQSupervisor::transitionConfiguring(toolbox::Event::Reference event)
309 {
310  __SUP_COUT__ << "transitionConfiguring" << __E__;
311 
312  // activate the configuration tree (the first iteration)
313  if(RunControlStateMachine::getIterationIndex() == 0 &&
314  RunControlStateMachine::getSubIterationIndex() == 0)
315  {
316  std::pair<std::string /*group name*/, TableGroupKey> theGroup(
317  SOAPUtilities::translate(theStateMachine_.getCurrentMessage())
318  .getParameters()
319  .getValue("ConfigurationTableGroupName"),
320  TableGroupKey(SOAPUtilities::translate(theStateMachine_.getCurrentMessage())
321  .getParameters()
322  .getValue("ConfigurationTableGroupKey")));
323 
324  __SUP_COUT__ << "Configuration table group name: " << theGroup.first
325  << " key: " << theGroup.second << __E__;
326 
327  theConfigurationManager_->loadTableGroup(
328  theGroup.first, theGroup.second, true /*doActivate*/);
329 
330  // start configuring thread
331  std::thread([](ARTDAQSupervisor* as) { ARTDAQSupervisor::configuringThread(as); },
332  this)
333  .detach();
334 
335  __SUP_COUT__ << "Configuring thread started." << __E__;
336 
337  RunControlStateMachine::indicateSubIterationWork();
338  }
339  else // not first time
340  {
341  std::string errorMessage = theStateMachine_.getErrorMessage();
342  __SUP_COUTV__(errorMessage);
343  __SUP_COUTV__(theProgressBar_.read());
344  __SUP_COUTV__(theProgressBar_.isComplete());
345 
346  // check for done and error messages
347 
348  if(errorMessage != "")
349  {
350  __SUP_SS__ << "Error was caught in configuring thread: " << errorMessage
351  << __E__;
352  __SUP_COUT_ERR__ << "\n" << ss.str();
353 
354  theStateMachine_.setErrorMessage(ss.str());
355  throw toolbox::fsm::exception::Exception(
356  "Transition Error" /*name*/,
357  ss.str() /* message*/,
358  "CoreSupervisorBase::transitionConfiguring" /*module*/,
359  __LINE__ /*line*/,
360  __FUNCTION__ /*function*/
361  );
362  }
363 
364  if(!theProgressBar_.isComplete())
365  {
366  RunControlStateMachine::indicateSubIterationWork();
367  sleep(1 /*seconds*/);
368  }
369  else
370  __SUP_COUT_INFO__ << "Complete configuring transition!" << __E__;
371  }
372 
373  return;
374 } // end transitionConfiguring()
375 
376 //========================================================================================================================
377 void ARTDAQSupervisor::configuringThread(ARTDAQSupervisor* theArtdaqSupervisor) try
378 {
379  ProgressBar& progressBar = theArtdaqSupervisor->theProgressBar_;
380 
381  const std::string& uid =
382  theArtdaqSupervisor->theConfigurationManager_
383  ->getNode(
384  ConfigurationManager::XDAQ_APPLICATION_TABLE_NAME + "/" +
385  theArtdaqSupervisor->CorePropertySupervisorBase::getSupervisorUID() +
386  "/" + "LinkToSupervisorTable")
387  .getValueAsString();
388 
389  __COUT__ << "Supervisor uid is " << uid << ", getting supervisor table node" << __E__;
390 
391  const std::string mfSubject_ =
392  theArtdaqSupervisor->supervisorClassNoNamespace_ + "-" + uid;
393 
394  ConfigurationTree theSupervisorNode = theArtdaqSupervisor->getSupervisorTableNode();
395 
396  std::map<int /*subsystem ID*/, ARTDAQTableBase::SubsystemInfo> subsystems;
397  std::map<ARTDAQTableBase::ARTDAQAppType, std::list<ARTDAQTableBase::ProcessInfo>> processes;
398 
399  progressBar.step();
400 
401  ARTDAQTableBase::extractArtdaqInfo(
402  theSupervisorNode,
403  subsystems,
404  processes,
405  true /*doWriteFHiCL*/,
406  theArtdaqSupervisor->CorePropertySupervisorBase::getSupervisorProperty<size_t>(
407  "max_fragment_size_bytes", ARTDAQTableBase::DEFAULT_MAX_FRAGMENT_SIZE),
408  &progressBar);
409 
410  std::list<ARTDAQTableBase::ProcessInfo>& readerInfo =
411  processes[ARTDAQTableBase::ARTDAQAppType::BoardReader];
412  std::list<ARTDAQTableBase::ProcessInfo>& builderInfo =
413  processes[ARTDAQTableBase::ARTDAQAppType::EventBuilder];
414  std::list<ARTDAQTableBase::ProcessInfo>& loggerInfo =
415  processes[ARTDAQTableBase::ARTDAQAppType::DataLogger];
416  std::list<ARTDAQTableBase::ProcessInfo>& dispatcherInfo =
417  processes[ARTDAQTableBase::ARTDAQAppType::Dispatcher];
418 
419  // Check lists
420  if(readerInfo.size() == 0)
421  {
422  __GEN_SS__ << "There must be at least one enabled BoardReader!" << __E__;
423  __GEN_SS_THROW__;
424  return;
425  }
426  if(builderInfo.size() == 0)
427  {
428  __GEN_SS__ << "There must be at least one enabled EventBuilder!" << __E__;
429  __GEN_SS_THROW__;
430  return;
431  }
432 
433  progressBar.step();
434 
435  __GEN_COUT__ << "Writing boot.txt" << __E__;
436 
437  int debugLevel = theSupervisorNode.getNode("DAQInterfaceDebugLevel").getValue<int>();
438  std::string setupScript = theSupervisorNode.getNode("DAQSetupScript").getValue();
439 
440  std::ofstream o(ARTDAQ_FCL_PATH + "/boot.txt", std::ios::trunc);
441  o << "DAQ setup script: " << setupScript << std::endl;
442  o << "debug level: " << debugLevel << std::endl;
443  o << std::endl;
444 
445  if(subsystems.size() > 1)
446  {
447  for(auto& ss : subsystems)
448  {
449  if(ss.first == 0)
450  continue;
451  o << "Subsystem id: " << ss.first << std::endl;
452  if(ss.second.destination != 0)
453  {
454  o << "Subsystem destination: " << ss.second.destination << std::endl;
455  }
456  for(auto& sss : ss.second.sources)
457  {
458  o << "Subsystem source: " << sss << std::endl;
459  }
460  o << std::endl;
461  }
462  }
463 
464  for(auto& builder : builderInfo)
465  {
466  o << "EventBuilder host: " << builder.hostname << std::endl;
467  o << "EventBuilder label: " << builder.label << std::endl;
468  if(builder.subsystem != 1)
469  {
470  o << "EventBuilder subsystem: " << builder.subsystem << std::endl;
471  }
472  o << std::endl;
473  }
474  for(auto& logger : loggerInfo)
475  {
476  o << "DataLogger host: " << logger.hostname << std::endl;
477  o << "DataLogger label: " << logger.label << std::endl;
478  if(logger.subsystem != 1)
479  {
480  o << "DataLogger subsystem: " << logger.subsystem << std::endl;
481  }
482  o << std::endl;
483  }
484  for(auto& dispatcher : dispatcherInfo)
485  {
486  o << "Dispatcher host: " << dispatcher.hostname << std::endl;
487  o << "Dispatcher label: " << dispatcher.label << std::endl;
488  if(dispatcher.subsystem != 1)
489  {
490  o << "Dispatcher subsystem: " << dispatcher.subsystem << std::endl;
491  }
492  o << std::endl;
493  }
494  o.close();
495 
496  progressBar.step();
497 
498  __GEN_COUT__ << "Building configuration directory" << __E__;
499 
500  boost::system::error_code ignored;
501  boost::filesystem::remove_all(ARTDAQ_FCL_PATH + FAKE_CONFIG_NAME, ignored);
502  mkdir((ARTDAQ_FCL_PATH + FAKE_CONFIG_NAME).c_str(), 0755);
503 
504  for(auto& reader : readerInfo)
505  {
506  symlink(
507  (ARTDAQ_FCL_PATH + "boardReader-" + reader.label + ".fcl").c_str(),
508  (ARTDAQ_FCL_PATH + FAKE_CONFIG_NAME + "/" + reader.label + ".fcl").c_str());
509  }
510  for(auto& builder : builderInfo)
511  {
512  symlink(
513  (ARTDAQ_FCL_PATH + "builder-" + builder.label + ".fcl").c_str(),
514  (ARTDAQ_FCL_PATH + FAKE_CONFIG_NAME + "/" + builder.label + ".fcl").c_str());
515  }
516  for(auto& logger : loggerInfo)
517  {
518  symlink(
519  (ARTDAQ_FCL_PATH + "datalogger-" + logger.label + ".fcl").c_str(),
520  (ARTDAQ_FCL_PATH + FAKE_CONFIG_NAME + "/" + logger.label + ".fcl").c_str());
521  }
522  for(auto& dispatcher : dispatcherInfo)
523  {
524  symlink((ARTDAQ_FCL_PATH + "dispatcher-" + dispatcher.label + ".fcl").c_str(),
525  (ARTDAQ_FCL_PATH + FAKE_CONFIG_NAME + "/" + dispatcher.label + ".fcl")
526  .c_str());
527  }
528 
529  progressBar.step();
530 
531  std::lock_guard<std::mutex> lk(theArtdaqSupervisor->daqinterface_mutex_);
532  theArtdaqSupervisor->getDAQState_();
533  if(theArtdaqSupervisor->daqinterface_state_ != "stopped" &&
534  theArtdaqSupervisor->daqinterface_state_ != "")
535  {
536  __GEN_SS__ << "Cannot configure DAQInterface because it is in the wrong state"
537  << " (" << theArtdaqSupervisor->daqinterface_state_ << " != stopped)!"
538  << __E__;
539  __GEN_SS_THROW__
540  }
541 
542  __GEN_COUT__ << "Calling setdaqcomps" << __E__;
543  __GEN_COUT__ << "Status before setdaqcomps: "
544  << theArtdaqSupervisor->daqinterface_state_ << __E__;
545  PyObject* pName1 = PyString_FromString("setdaqcomps");
546 
547  PyObject* readerDict = PyDict_New();
548  for(auto& reader : readerInfo)
549  {
550  PyObject* readerName = PyString_FromString(reader.label.c_str());
551 
552  PyObject* readerData = PyList_New(3);
553  PyObject* readerHost = PyString_FromString(reader.hostname.c_str());
554  PyObject* readerPort = PyString_FromString("-1");
555  PyObject* readerSubsystem =
556  PyString_FromString(std::to_string(reader.subsystem).c_str());
557  PyList_SetItem(readerData, 0, readerHost);
558  PyList_SetItem(readerData, 1, readerPort);
559  PyList_SetItem(readerData, 2, readerSubsystem);
560  PyDict_SetItem(readerDict, readerName, readerData);
561  }
562  PyObject* res1 = PyObject_CallMethodObjArgs(
563  theArtdaqSupervisor->daqinterface_ptr_, pName1, readerDict, NULL);
564  Py_DECREF(readerDict);
565 
566  if(res1 == NULL)
567  {
568  PyErr_Print();
569  __GEN_SS__ << "Error calling setdaqcomps transition" << __E__;
570  __GEN_SS_THROW__;
571  }
572  theArtdaqSupervisor->getDAQState_();
573  __GEN_COUT__ << "Status after setdaqcomps: "
574  << theArtdaqSupervisor->daqinterface_state_ << __E__;
575 
576  progressBar.step();
577  __GEN_COUT__ << "Calling do_boot" << __E__;
578  __GEN_COUT__ << "Status before boot: " << theArtdaqSupervisor->daqinterface_state_
579  << __E__;
580  PyObject* pName2 = PyString_FromString("do_boot");
581  PyObject* pStateArgs1 = PyString_FromString((ARTDAQ_FCL_PATH + "/boot.txt").c_str());
582  PyObject* res2 = PyObject_CallMethodObjArgs(
583  theArtdaqSupervisor->daqinterface_ptr_, pName2, pStateArgs1, NULL);
584 
585  if(res2 == NULL)
586  {
587  PyErr_Print();
588  __GEN_SS__ << "Error calling boot transition" << __E__;
589  __GEN_SS_THROW__;
590  }
591 
592  theArtdaqSupervisor->getDAQState_();
593  if(theArtdaqSupervisor->daqinterface_state_ != "booted")
594  {
595  __GEN_SS__ << "DAQInterface boot transition failed!" << __E__;
596  __GEN_SS_THROW__
597  }
598  __GEN_COUT__ << "Status after boot: " << theArtdaqSupervisor->daqinterface_state_
599  << __E__;
600 
601  progressBar.step();
602  __GEN_COUT__ << "Calling do_config" << __E__;
603  __GEN_COUT__ << "Status before config: " << theArtdaqSupervisor->daqinterface_state_
604  << __E__;
605  PyObject* pName3 = PyString_FromString("do_config");
606  PyObject* pStateArgs2 = Py_BuildValue("[s]", FAKE_CONFIG_NAME);
607  PyObject* res3 = PyObject_CallMethodObjArgs(
608  theArtdaqSupervisor->daqinterface_ptr_, pName3, pStateArgs2, NULL);
609 
610  if(res3 == NULL)
611  {
612  PyErr_Print();
613  __GEN_SS__ << "Error calling config transition" << __E__;
614  __GEN_SS_THROW__;
615  }
616  theArtdaqSupervisor->getDAQState_();
617  if(theArtdaqSupervisor->daqinterface_state_ != "ready")
618  {
619  __GEN_SS__ << "DAQInterface config transition failed!" << __E__;
620  __GEN_SS_THROW__;
621  }
622  __GEN_COUT__ << "Status after config: " << theArtdaqSupervisor->daqinterface_state_
623  << __E__;
624  progressBar.complete();
625  __GEN_COUT__ << "Configured." << __E__;
626 
627 } // end configuringThread()
628 catch(const std::runtime_error& e)
629 {
630  __SS__ << "Error was caught while configuring: " << e.what() << __E__;
631  __COUT_ERR__ << "\n" << ss.str();
632  theArtdaqSupervisor->theStateMachine_.setErrorMessage(ss.str());
633 }
634 catch(...)
635 {
636  __SS__ << "Unknown error was caught while configuring. Please checked the logs."
637  << __E__;
638  __COUT_ERR__ << "\n" << ss.str();
639  theArtdaqSupervisor->theStateMachine_.setErrorMessage(ss.str());
640 } // end configuringThread() error handling
641 
642 //========================================================================================================================
643 void ARTDAQSupervisor::transitionHalting(toolbox::Event::Reference event)
644 try
645 {
646  __SUP_COUT__ << "Halting..." << __E__;
647  std::lock_guard<std::mutex> lk(daqinterface_mutex_);
648  getDAQState_();
649  __SUP_COUT__ << "Status before halt: " << daqinterface_state_ << __E__;
650 
651  PyObject* pName = PyString_FromString("do_command");
652  PyObject* pArg = PyString_FromString("Shutdown");
653  PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, pArg, NULL);
654 
655  if(res == NULL)
656  {
657  PyErr_Print();
658  __SS__ << "Error calling Shutdown transition" << __E__;
659  __SUP_SS_THROW__;
660  }
661 
662  getDAQState_();
663  __SUP_COUT__ << "Status after halt: " << daqinterface_state_ << __E__;
664  __SUP_COUT__ << "Halted." << __E__;
665 } // end transitionHalting()
666 catch(const std::runtime_error& e)
667 {
668  const std::string transitionName = "Halting";
669  // if halting from Failed state, then ignore errors
670  if(theStateMachine_.getProvenanceStateName() ==
671  RunControlStateMachine::FAILED_STATE_NAME ||
672  theStateMachine_.getProvenanceStateName() ==
673  RunControlStateMachine::HALTED_STATE_NAME)
674  {
675  __SUP_COUT_INFO__ << "Error was caught while halting (but ignoring because "
676  "previous state was '"
677  << RunControlStateMachine::FAILED_STATE_NAME
678  << "'): " << e.what() << __E__;
679  }
680  else // if not previously in Failed state, then fail
681  {
682  __SUP_SS__ << "Error was caught while " << transitionName << ": " << e.what()
683  << __E__;
684  __SUP_COUT_ERR__ << "\n" << ss.str();
685  theStateMachine_.setErrorMessage(ss.str());
686  throw toolbox::fsm::exception::Exception(
687  "Transition Error" /*name*/,
688  ss.str() /* message*/,
689  "ARTDAQSupervisorBase::transition" + transitionName /*module*/,
690  __LINE__ /*line*/,
691  __FUNCTION__ /*function*/
692  );
693  }
694 } // end transitionHalting() std::runtime_error exception handling
695 catch(...)
696 {
697  const std::string transitionName = "Halting";
698  // if halting from Failed state, then ignore errors
699  if(theStateMachine_.getProvenanceStateName() ==
700  RunControlStateMachine::FAILED_STATE_NAME ||
701  theStateMachine_.getProvenanceStateName() ==
702  RunControlStateMachine::HALTED_STATE_NAME)
703  {
704  __SUP_COUT_INFO__ << "Unknown error was caught while halting (but ignoring "
705  "because previous state was '"
706  << RunControlStateMachine::FAILED_STATE_NAME << "')."
707  << __E__;
708  }
709  else // if not previously in Failed state, then fail
710  {
711  __SUP_SS__ << "Unknown error was caught while " << transitionName
712  << ". Please checked the logs." << __E__;
713  __SUP_COUT_ERR__ << "\n" << ss.str();
714  theStateMachine_.setErrorMessage(ss.str());
715  throw toolbox::fsm::exception::Exception(
716  "Transition Error" /*name*/,
717  ss.str() /* message*/,
718  "ARTDAQSupervisorBase::transition" + transitionName /*module*/,
719  __LINE__ /*line*/,
720  __FUNCTION__ /*function*/
721  );
722  }
723 } // end transitionHalting() exception handling
724 
725 //========================================================================================================================
726 void ARTDAQSupervisor::transitionInitializing(toolbox::Event::Reference event)
727 {
728  __SUP_COUT__ << "Initializing..." << __E__;
729  init();
730  __SUP_COUT__ << "Initialized." << __E__;
731 } // end transitionInitializing()
732 
733 //========================================================================================================================
734 void ARTDAQSupervisor::transitionPausing(toolbox::Event::Reference event)
735 {
736  __SUP_COUT__ << "Pausing..." << __E__;
737  std::lock_guard<std::mutex> lk(daqinterface_mutex_);
738 
739  getDAQState_();
740  __SUP_COUT__ << "Status before pause: " << daqinterface_state_ << __E__;
741 
742  PyObject* pName = PyString_FromString("do_command");
743  PyObject* pArg = PyString_FromString("Pause");
744  PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, pArg, NULL);
745 
746  if(res == NULL)
747  {
748  PyErr_Print();
749  __SS__ << "Error calling Pause transition" << __E__;
750  __SUP_SS_THROW__;
751  }
752 
753  getDAQState_();
754  __SUP_COUT__ << "Status after pause: " << daqinterface_state_ << __E__;
755 
756  __SUP_COUT__ << "Paused." << __E__;
757 } // end transitionPausing()
758 
759 //========================================================================================================================
760 void ARTDAQSupervisor::transitionResuming(toolbox::Event::Reference event)
761 {
762  __SUP_COUT__ << "Resuming..." << __E__;
763  std::lock_guard<std::mutex> lk(daqinterface_mutex_);
764 
765  getDAQState_();
766  __SUP_COUT__ << "Status before resume: " << daqinterface_state_ << __E__;
767  PyObject* pName = PyString_FromString("do_command");
768  PyObject* pArg = PyString_FromString("Resume");
769  PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, pArg, NULL);
770 
771  if(res == NULL)
772  {
773  PyErr_Print();
774  __SS__ << "Error calling Resume transition" << __E__;
775  __SUP_SS_THROW__;
776  }
777  getDAQState_();
778  __SUP_COUT__ << "Status after resume: " << daqinterface_state_ << __E__;
779  __SUP_COUT__ << "Resumed." << __E__;
780 } // end transitionResuming()
781 
782 //========================================================================================================================
783 void ARTDAQSupervisor::transitionStarting(toolbox::Event::Reference event)
784 {
785  __SUP_COUT__ << "Starting..." << __E__;
786  {
787  std::lock_guard<std::mutex> lk(daqinterface_mutex_);
788  getDAQState_();
789  __SUP_COUT__ << "Status before start: " << daqinterface_state_ << __E__;
790  auto runNumber = SOAPUtilities::translate(theStateMachine_.getCurrentMessage())
791  .getParameters()
792  .getValue("RunNumber");
793 
794  PyObject* pName = PyString_FromString("do_start_running");
795  int run_number = std::stoi(runNumber);
796  PyObject* pStateArgs = PyInt_FromLong(run_number);
797  PyObject* res =
798  PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, pStateArgs, NULL);
799 
800  if(res == NULL)
801  {
802  PyErr_Print();
803  __SS__ << "Error calling start transition" << __E__;
804  __SUP_SS_THROW__;
805  }
806  getDAQState_();
807  __SUP_COUT__ << "Status after start: " << daqinterface_state_ << __E__;
808  if(daqinterface_state_ != "running")
809  {
810  __SS__ << "DAQInterface start transition failed!" << __E__;
811  __SUP_SS_THROW__;
812  }
813  }
814  start_runner_();
815  __SUP_COUT__ << "Started." << __E__;
816 } // end transitionStarting()
817 
818 //========================================================================================================================
819 void ARTDAQSupervisor::transitionStopping(toolbox::Event::Reference event)
820 {
821  __SUP_COUT__ << "Stopping..." << __E__;
822  std::lock_guard<std::mutex> lk(daqinterface_mutex_);
823  getDAQState_();
824  __SUP_COUT__ << "Status before stop: " << daqinterface_state_ << __E__;
825  PyObject* pName = PyString_FromString("do_stop_running");
826  PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, NULL);
827 
828  if(res == NULL)
829  {
830  PyErr_Print();
831  __SS__ << "Error calling stop transition" << __E__;
832  __SUP_SS_THROW__;
833  }
834  getDAQState_();
835  __SUP_COUT__ << "Status after stop: " << daqinterface_state_ << __E__;
836  __SUP_COUT__ << "Stopped." << __E__;
837 } // end transitionStopping()
838 
839 //========================================================================================================================
840 void ots::ARTDAQSupervisor::enteringError(toolbox::Event::Reference event)
841 {
842  __SUP_COUT__ << "Entering error recovery state" << __E__;
843  std::lock_guard<std::mutex> lk(daqinterface_mutex_);
844  getDAQState_();
845  __SUP_COUT__ << "Status before error: " << daqinterface_state_ << __E__;
846 
847  PyObject* pName = PyString_FromString("do_recover");
848  PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, NULL);
849 
850  if(res == NULL)
851  {
852  PyErr_Print();
853  __SS__ << "Error calling recover transition" << __E__;
854  __SUP_SS_THROW__;
855  }
856  getDAQState_();
857  __SUP_COUT__ << "Status after error: " << daqinterface_state_ << __E__;
858  __SUP_COUT__ << "EnteringError DONE." << __E__;
859 
860 } // end enteringError()
861 
862 //========================================================================================================================
863 void ots::ARTDAQSupervisor::getDAQState_()
864 {
865  //__SUP_COUT__ << "Getting DAQInterface state" << __E__;
866 
867  PyObject* pName = PyString_FromString("state");
868  PyObject* pArg = PyString_FromString("DAQInterface");
869  PyObject* res = PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, pArg, NULL);
870 
871  if(res == NULL)
872  {
873  PyErr_Print();
874  __SS__ << "Error calling state function" << __E__;
875  __SUP_SS_THROW__;
876  return;
877  }
878  daqinterface_state_ = std::string(PyString_AsString(res));
879  //__SUP_COUT__ << "getDAQState_ DONE: state=" << result << __E__;
880 } // end getDAQState_()
881 
882 //========================================================================================================================
883 void ots::ARTDAQSupervisor::daqinterfaceRunner_()
884 {
885  TLOG(TLVL_TRACE) << "Runner thread starting";
886  runner_running_ = true;
887  while(runner_running_)
888  {
889  if(daqinterface_ptr_ != NULL)
890  {
891  std::unique_lock<std::mutex> lk(daqinterface_mutex_);
892  getDAQState_();
893  std::string state_before = daqinterface_state_;
894 
895  if(daqinterface_state_ == "running" || daqinterface_state_ == "ready" ||
896  daqinterface_state_ == "booted")
897  {
898  try
899  {
900  TLOG(TLVL_TRACE) << "Calling DAQInterface::check_proc_heartbeats";
901  PyObject* pName = PyString_FromString("check_proc_heartbeats");
902  PyObject* res =
903  PyObject_CallMethodObjArgs(daqinterface_ptr_, pName, NULL);
904  TLOG(TLVL_TRACE)
905  << "Done with DAQInterface::check_proc_heartbeats call";
906 
907  if(res == NULL)
908  {
909  runner_running_ = false;
910  lk.unlock();
911  PyErr_Print();
912  __SS__ << "Error calling check_proc_heartbeats function" << __E__;
913  __SUP_SS_THROW__;
914  break;
915  }
916  }
917  catch(cet::exception& ex)
918  {
919  runner_running_ = false;
920  lk.unlock();
921  PyErr_Print();
922  __SS__ << "An cet::exception occurred while calling "
923  "check_proc_heartbeats function: "
924  << ex.explain_self() << __E__;
925  __SUP_SS_THROW__;
926  break;
927  }
928  catch(std::exception& ex)
929  {
930  runner_running_ = false;
931  lk.unlock();
932  PyErr_Print();
933  __SS__ << "An std::exception occurred while calling "
934  "check_proc_heartbeats function: "
935  << ex.what() << __E__;
936  __SUP_SS_THROW__;
937  break;
938  }
939  catch(...)
940  {
941  runner_running_ = false;
942  lk.unlock();
943  PyErr_Print();
944  __SS__ << "An unknown Error occurred while calling runner function"
945  << __E__;
946  __SUP_SS_THROW__;
947  break;
948  }
949 
950  getDAQState_();
951  if(daqinterface_state_ != state_before)
952  {
953  runner_running_ = false;
954  lk.unlock();
955  __SS__ << "DAQInterface state unexpectedly changed from "
956  << state_before << " to " << daqinterface_state_
957  << ". Check supervisor log file for more info!" << __E__;
958  __SUP_SS_THROW__;
959  break;
960  }
961  }
962  }
963  else
964  {
965  break;
966  }
967  usleep(1000000);
968  }
969  runner_running_ = false;
970  TLOG(TLVL_TRACE) << "Runner thread complete";
971 } // end daqinterfaceRunner_()
972 
973 //========================================================================================================================
974 void ots::ARTDAQSupervisor::stop_runner_()
975 {
976  runner_running_ = false;
977  if(runner_thread_ && runner_thread_->joinable())
978  {
979  runner_thread_->join();
980  runner_thread_.reset(nullptr);
981  }
982 } // end stop_runner_()
983 
984 //========================================================================================================================
985 void ots::ARTDAQSupervisor::start_runner_()
986 {
987  stop_runner_();
988  runner_thread_ =
989  std::make_unique<std::thread>(&ots::ARTDAQSupervisor::daqinterfaceRunner_, this);
990 } // end start_runner_()