otsdaq  v2_04_01
RunControlStateMachine.cc
1 #include "otsdaq-core/FiniteStateMachine/RunControlStateMachine.h"
2 #include "otsdaq-core/MessageFacility/MessageFacility.h"
3 
4 #include "otsdaq-core/Macros/CoutMacros.h"
5 #include "otsdaq-core/Macros/StringMacros.h"
6 
7 #include "otsdaq-core/SOAPUtilities/SOAPCommand.h"
8 #include "otsdaq-core/SOAPUtilities/SOAPUtilities.h"
9 
10 #include <toolbox/fsm/FailedEvent.h>
11 #include <xdaq/NamespaceURI.h>
12 #include <xoap/Method.h>
13 
14 #include <iostream>
15 
16 #undef __MF_SUBJECT__
17 #define __MF_SUBJECT__ std::string("FSM-") + theStateMachine_.getStateMachineName()
18 
19 using namespace ots;
20 
21 const std::string RunControlStateMachine::FAILED_STATE_NAME = "Failed";
22 
23 //========================================================================================================================
24 RunControlStateMachine::RunControlStateMachine(const std::string& name)
25  : theStateMachine_(name)
26  , asyncFailureReceived_(false)
27  , asyncSoftFailureReceived_(false)
28 {
29  INIT_MF("RunControlStateMachine");
30 
31  theStateMachine_.addState(
32  'I', "Initial", this, &RunControlStateMachine::stateInitial);
33  theStateMachine_.addState('H', "Halted", this, &RunControlStateMachine::stateHalted);
34  theStateMachine_.addState(
35  'C', "Configured", this, &RunControlStateMachine::stateConfigured);
36  theStateMachine_.addState(
37  'R', "Running", this, &RunControlStateMachine::stateRunning);
38  theStateMachine_.addState('P', "Paused", this, &RunControlStateMachine::statePaused);
39  theStateMachine_.addState(
40  'X', "Shutdown", this, &RunControlStateMachine::stateShutdown);
41  // theStateMachine_.addState('v', "Recovering", this,
42  // &RunControlStateMachine::stateRecovering); theStateMachine_.addState('T',
43  // "TTSTestMode", this, &RunControlStateMachine::stateTTSTestMode);
44 
45  // RAR added back in on 11/20/2016.. why was it removed..
46  // exceptions like..
47  // XCEPT_RAISE (toolbox::fsm::exception::Exception, ss.str());)
48  // take state machine to "failed" otherwise
49  theStateMachine_.setStateName('F', RunControlStateMachine::FAILED_STATE_NAME); // x
50  theStateMachine_.setFailedStateTransitionAction(
51  this, &RunControlStateMachine::enteringError);
52  theStateMachine_.setFailedStateTransitionChanged(this,
53  &RunControlStateMachine::inError);
54 
55  // this line was added to get out of Failed state
56  RunControlStateMachine::addStateTransition(
57  'F', 'H', "Halt", "Halting", this, &RunControlStateMachine::transitionHalting);
58  RunControlStateMachine::addStateTransition(
59  'F',
60  'X',
61  "Shutdown",
62  "Shutting Down",
63  this,
64  &RunControlStateMachine::transitionShuttingDown);
65 
66  RunControlStateMachine::addStateTransition(
67  'H',
68  'C',
69  "Configure",
70  "Configuring",
71  "ConfigurationAlias",
72  this,
73  &RunControlStateMachine::transitionConfiguring);
74  RunControlStateMachine::addStateTransition(
75  'H',
76  'X',
77  "Shutdown",
78  "Shutting Down",
79  this,
80  &RunControlStateMachine::transitionShuttingDown);
81  RunControlStateMachine::addStateTransition(
82  'X',
83  'I',
84  "Startup",
85  "Starting Up",
86  this,
87  &RunControlStateMachine::transitionStartingUp);
88 
89  // Every state can transition to halted
90  RunControlStateMachine::addStateTransition(
91  'I',
92  'H',
93  "Initialize",
94  "Initializing",
95  this,
96  &RunControlStateMachine::transitionInitializing);
97  RunControlStateMachine::addStateTransition(
98  'H', 'H', "Halt", "Halting", this, &RunControlStateMachine::transitionHalting);
99  RunControlStateMachine::addStateTransition(
100  'C', 'H', "Halt", "Halting", this, &RunControlStateMachine::transitionHalting);
101  RunControlStateMachine::addStateTransition(
102  'R', 'H', "Abort", "Aborting", this, &RunControlStateMachine::transitionHalting);
103  RunControlStateMachine::addStateTransition(
104  'P', 'H', "Abort", "Aborting", this, &RunControlStateMachine::transitionHalting);
105 
106  RunControlStateMachine::addStateTransition(
107  'R', 'P', "Pause", "Pausing", this, &RunControlStateMachine::transitionPausing);
108  RunControlStateMachine::addStateTransition(
109  'P',
110  'R',
111  "Resume",
112  "Resuming",
113  this,
114  &RunControlStateMachine::transitionResuming);
115  RunControlStateMachine::addStateTransition(
116  'C', 'R', "Start", "Starting", this, &RunControlStateMachine::transitionStarting);
117  RunControlStateMachine::addStateTransition(
118  'R', 'C', "Stop", "Stopping", this, &RunControlStateMachine::transitionStopping);
119  RunControlStateMachine::addStateTransition(
120  'P', 'C', "Stop", "Stopping", this, &RunControlStateMachine::transitionStopping);
121 
122  // NOTE!! There must be a defined message handler for each transition name created
123  // above
124  xoap::bind(this,
125  &RunControlStateMachine::runControlMessageHandler,
126  "Initialize",
127  XDAQ_NS_URI);
128  xoap::bind(this,
129  &RunControlStateMachine::runControlMessageHandler,
130  "Configure",
131  XDAQ_NS_URI);
132  xoap::bind(
133  this, &RunControlStateMachine::runControlMessageHandler, "Start", XDAQ_NS_URI);
134  xoap::bind(
135  this, &RunControlStateMachine::runControlMessageHandler, "Stop", XDAQ_NS_URI);
136  xoap::bind(
137  this, &RunControlStateMachine::runControlMessageHandler, "Pause", XDAQ_NS_URI);
138  xoap::bind(
139  this, &RunControlStateMachine::runControlMessageHandler, "Resume", XDAQ_NS_URI);
140  xoap::bind(
141  this, &RunControlStateMachine::runControlMessageHandler, "Halt", XDAQ_NS_URI);
142  xoap::bind(
143  this, &RunControlStateMachine::runControlMessageHandler, "Abort", XDAQ_NS_URI);
144  xoap::bind(
145  this, &RunControlStateMachine::runControlMessageHandler, "Shutdown", XDAQ_NS_URI);
146  xoap::bind(
147  this, &RunControlStateMachine::runControlMessageHandler, "Startup", XDAQ_NS_URI);
148  xoap::bind(
149  this, &RunControlStateMachine::runControlMessageHandler, "Fail", XDAQ_NS_URI);
150  xoap::bind(
151  this, &RunControlStateMachine::runControlMessageHandler, "Error", XDAQ_NS_URI);
152 
153  xoap::bind(this,
154  &RunControlStateMachine::runControlMessageHandler,
155  "AsyncError",
156  XDAQ_NS_URI);
157  xoap::bind(this,
158  &RunControlStateMachine::runControlMessageHandler,
159  "AsyncSoftError",
160  XDAQ_NS_URI);
161 
162  reset();
163 }
164 
165 //========================================================================================================================
166 RunControlStateMachine::~RunControlStateMachine(void) {}
167 
168 //========================================================================================================================
169 void RunControlStateMachine::reset(void)
170 {
171  __COUT__ << "Resetting RunControlStateMachine with name '"
172  << theStateMachine_.getStateMachineName() << "'..." << __E__;
173  theStateMachine_.setInitialState('I');
174  theStateMachine_.reset();
175 
176  theStateMachine_.setErrorMessage("", false /*append*/); // clear error message
177 
178  asyncFailureReceived_ = false;
179  asyncSoftFailureReceived_ = false;
180 }
181 
183 //(RunControlStateMachine::stateMachineFunction_t)
184 // RunControlStateMachine::getTransitionName( const toolbox::fsm::State from,
185 // const std::string& transition)
186 //{
187 // auto itFrom = stateTransitionFunctionTable_.find(from);
188 // if(itFrom == stateTransitionFunctionTable_.end())
189 // {
190 // __SS__ << "Cannot find transition function from '" << from <<
191 // "' with transition '" << transition << "!'" << __E__;
192 // XCEPT_RAISE (toolbox::fsm::exception::Exception, ss.str());
193 // }
194 //
195 // auto itTrans = itFrom->second.find(transition);
196 // if(itTrans == itFrom->second.end())
197 // {
198 // __SS__ << "Cannot find transition function from '" << from <<
199 // "' with transition '" << transition << "!'" << __E__;
200 // XCEPT_RAISE (toolbox::fsm::exception::Exception, ss.str());
201 // }
202 //
203 // return itTrans->second;
204 //}
205 
206 //========================================================================================================================
207 // runControlMessageHandler
208 // Handles the command broadcast message from the Gateway Supervisor
209 // and maps the command to a transition function, allowing for multiple iteration
210 // passes through the transition function.
211 xoap::MessageReference RunControlStateMachine::runControlMessageHandler(
212  xoap::MessageReference message)
213 
214 {
215  __COUT__ << "Received... \t" << SOAPUtilities::translate(message) << std::endl;
216 
217  std::string command = SOAPUtilities::translate(message).getCommand();
218 
219  // get iteration index
220  try
221  {
222  StringMacros::getNumber(
223  SOAPUtilities::translate(message).getParameters().getValue("iterationIndex"),
224  iterationIndex_);
225  }
226  catch(...) // ignore errors and set iteration index to 0
227  {
228  __COUT__ << "Defaulting iteration index to 0." << __E__;
229  iterationIndex_ = 0;
230  }
231  // get subIteration index
232  try
233  {
234  StringMacros::getNumber(
235  SOAPUtilities::translate(message).getParameters().getValue(
236  "subIterationIndex"),
237  subIterationIndex_);
238  }
239  catch(...) // ignore errors and set subIteration index to 0
240  {
241  __COUT__ << "Defaulting subIterationIndex_ index to 0." << __E__;
242  subIterationIndex_ = 0;
243  }
244 
245  // get retransmission indicator
246  try
247  {
248  if(SOAPUtilities::translate(message).getParameters().getValue("retransmission") ==
249  "1")
250  {
251  // handle retransmission
252 
253  // attempt to stop an error if last command was same
254  if(lastIterationCommand_ == command &&
255  lastIterationIndex_ == iterationIndex_ &&
256  lastSubIterationIndex_ == subIterationIndex_)
257  {
258  __COUT__
259  << "Assuming a timeout occurred at Gateway waiting for a response. "
260  << "Attempting to avoid error, by giving last result for command '"
261  << command << "': " << lastIterationResult_ << __E__;
262  return SOAPUtilities::makeSOAPMessageReference(lastIterationResult_);
263  }
264  else
265  __COUT__ << "Looks like Gateway command '" << command
266  << "' was lost - attempting to handle retransmission." << __E__;
267  }
268  }
269  catch(...) // ignore errors for retransmission indicator (assume it is not a
270  // retransmission)
271  {
272  ;
273  }
274  lastIterationIndex_ = iterationIndex_;
275  lastSubIterationIndex_ = subIterationIndex_;
276 
277  std::string currentState;
278  if(iterationIndex_ == 0 && subIterationIndex_ == 0)
279  {
280  // this is the first iteration attempt for this transition
281  theProgressBar_.reset(command, theStateMachine_.getStateMachineName());
282  currentState = theStateMachine_.getCurrentStateName();
283  __COUT__ << "Starting state for " << theStateMachine_.getStateMachineName()
284  << " is " << currentState << " and attempting to " << command
285  << std::endl;
286  }
287  else
288  {
289  currentState = theStateMachine_.getStateName(lastIterationState_);
290 
291  __COUT__ << "Iteration index " << iterationIndex_ << "." << subIterationIndex_
292  << " for " << theStateMachine_.getStateMachineName() << " from "
293  << currentState << " attempting to " << command << std::endl;
294  }
295 
296  RunControlStateMachine::theProgressBar_.step();
297 
298  std::string result = command + "Done";
299  lastIterationResult_ = result;
300 
301  // if error is received, immediately go to fail state
302  // likely error was sent by central FSM or external xoap
303  if(command == "Error" || command == "Fail")
304  {
305  __SS__ << command << " was received! Halting immediately." << std::endl;
306  __COUT_ERR__ << "\n" << ss.str();
307 
308  try
309  {
310  if(currentState == "Configured")
311  theStateMachine_.execTransition("Halt", message);
312  else if(currentState == "Running" || currentState == "Paused")
313  theStateMachine_.execTransition("Abort", message);
314  }
315  catch(...)
316  {
317  __COUT_ERR__ << "Halting failed in reaction to " << command << "... ignoring."
318  << __E__;
319  }
320  return SOAPUtilities::makeSOAPMessageReference(result);
321  }
322  else if(command == "AsyncError")
323  {
324  std::string errorMessage =
325  SOAPUtilities::translate(message).getParameters().getValue("ErrorMessage");
326 
327  __SS__ << command << " was received! Error'ing immediately: " << errorMessage
328  << std::endl;
329  __COUT_ERR__ << "\n" << ss.str();
330  theStateMachine_.setErrorMessage(ss.str());
331 
332  asyncFailureReceived_ = true; // mark flag, to be used to abort next transition
333  // determine any valid transition from where we are
334  theStateMachine_.execTransition("fail");
335  // XCEPT_RAISE (toolbox::fsm::exception::Exception, ss.str());
336 
337  return SOAPUtilities::makeSOAPMessageReference(result);
338  }
339  else if(command == "AsyncSoftError")
340  {
341  std::string errorMessage =
342  SOAPUtilities::translate(message).getParameters().getValue("ErrorMessage");
343 
344  __SS__ << command << " was received! Pause'ing immediately: " << errorMessage
345  << std::endl;
346  __COUT_ERR__ << "\n" << ss.str();
347  theStateMachine_.setErrorMessage(ss.str());
348 
349  if(!asyncSoftFailureReceived_) // launch pause only first time
350  {
351  asyncSoftFailureReceived_ = true; // mark flag, to be used to avoid double
352  // pausing and identify pause was due to
353  // soft error
354  theStateMachine_.execTransition("Pause");
355  }
356 
357  return SOAPUtilities::makeSOAPMessageReference(result);
358  }
359 
360  // if already Halted, respond to Initialize with "done"
361  // (this avoids race conditions involved with artdaq mpi reset)
362  if(command == "Initialize" && currentState == "Halted")
363  {
364  __COUT__ << "Already Initialized.. ignoring Initialize command." << std::endl;
365 
366  theStateMachine_.setErrorMessage("", false /*append*/); // clear error message
367  return SOAPUtilities::makeSOAPMessageReference(result);
368  }
369 
370  // handle normal transitions here
371  try
372  {
373  theStateMachine_.setErrorMessage("", false /*append*/); // clear error message
374 
375  iterationWorkFlag_ = false;
376  subIterationWorkFlag_ = false;
377  if(iterationIndex_ || subIterationIndex_)
378  {
379  __COUT__ << command << " iteration " << iterationIndex_ << "."
380  << subIterationIndex_ << __E__;
381  toolbox::Event::Reference event(new toolbox::Event(command, this));
382 
383  // call inheriting transition function based on last state and command
384  {
385  // e.g. transitionConfiguring(event);
386  __COUT__ << "Iterating on the transition function from " << currentState
387  << " through " << lastIterationCommand_ << __E__;
388 
389  auto itFrom = stateTransitionFunctionTable_.find(lastIterationState_);
390  if(itFrom == stateTransitionFunctionTable_.end())
391  {
392  __SS__ << "Cannot find transition function from '" << currentState
393  << "' with transition '" << lastIterationCommand_ << "!'"
394  << __E__;
395  __COUT_ERR__ << ss.str();
396  XCEPT_RAISE(toolbox::fsm::exception::Exception, ss.str());
397  }
398 
399  auto itTransition = itFrom->second.find(lastIterationCommand_);
400  if(itTransition == itFrom->second.end())
401  {
402  __SS__ << "Cannot find transition function from '" << currentState
403  << "' with transition '" << lastIterationCommand_ << "!'"
404  << __E__;
405  __COUT_ERR__ << ss.str();
406  XCEPT_RAISE(toolbox::fsm::exception::Exception, ss.str());
407  }
408 
409  (this->*(itTransition->second))(event); // call the transition function
410  }
411  }
412  else
413  {
414  // save the lookup parameters for the last function to be called for the case
415  // of additional iterations
416  lastIterationState_ = theStateMachine_.getCurrentState();
417  lastIterationCommand_ = command;
418 
419  theStateMachine_.execTransition(command, message);
420  }
421 
422  if(subIterationWorkFlag_) // sub-iteration has priority over 'Working'
423  {
424  __COUTV__(subIterationWorkFlag_);
425  result =
426  command + "SubIterate"; // indicate another sub-iteration back to Gateway
427  }
428  else if(iterationWorkFlag_)
429  {
430  __COUTV__(iterationWorkFlag_);
431  result = command + "Iterate"; // indicate another iteration back to Gateway
432  }
433  }
434  catch(toolbox::fsm::exception::Exception& e)
435  {
436  __SS__ << "Run Control Message Handling Failed: " << e.what() << " "
437  << theStateMachine_.getErrorMessage() << __E__;
438  __COUT_ERR__ << ss.str();
439  theStateMachine_.setErrorMessage(ss.str());
440 
441  result = command + " " + RunControlStateMachine::FAILED_STATE_NAME + ": " +
442  theStateMachine_.getErrorMessage();
443  }
444  catch(...)
445  {
446  __SS__ << "Run Control Message Handling encountered an unknown error."
447  << theStateMachine_.getErrorMessage() << __E__;
448  __COUT_ERR__ << ss.str();
449  theStateMachine_.setErrorMessage(ss.str());
450 
451  result = command + " " + RunControlStateMachine::FAILED_STATE_NAME + ": " +
452  theStateMachine_.getErrorMessage();
453  }
454 
455  RunControlStateMachine::theProgressBar_.step();
456 
457  currentState = theStateMachine_.getCurrentStateName();
458 
459  if(currentState == RunControlStateMachine::FAILED_STATE_NAME)
460  {
461  result = command + " " + RunControlStateMachine::FAILED_STATE_NAME + ": " +
462  theStateMachine_.getErrorMessage();
463  __COUT_ERR__ << "Unexpected Failure state for "
464  << theStateMachine_.getStateMachineName() << " is " << currentState
465  << std::endl;
466  __COUT_ERR__ << "Error message was as follows: "
467  << theStateMachine_.getErrorMessage() << std::endl;
468  }
469 
470  RunControlStateMachine::theProgressBar_.step();
471 
472  if(!iterationWorkFlag_)
473  theProgressBar_.complete();
474 
475  __COUT__ << "Ending state for " << theStateMachine_.getStateMachineName() << " is "
476  << currentState << std::endl;
477  __COUT__ << "result = " << result << std::endl;
478  lastIterationResult_ = result;
479  return SOAPUtilities::makeSOAPMessageReference(result);
480 }