otsdaq  v2_04_02
RunControlStateMachine.cc
1 #include "otsdaq/FiniteStateMachine/RunControlStateMachine.h"
2 #include "otsdaq/MessageFacility/MessageFacility.h"
3 
4 #include "otsdaq/Macros/CoutMacros.h"
5 #include "otsdaq/Macros/StringMacros.h"
6 
7 #include "otsdaq/SOAPUtilities/SOAPCommand.h"
8 #include "otsdaq/SOAPUtilities/SOAPUtilities.h"
9 
10 #include <toolbox/fsm/FailedEvent.h>
11 #include <xdaq/NamespaceURI.h>
12 #include <xoap/Method.h>
13 
14 #include <iostream>
15 
16 #undef __MF_SUBJECT__
17 #define __MF_SUBJECT__ std::string("FSM-") + theStateMachine_.getStateMachineName()
18 
19 using namespace ots;
20 
21 const std::string RunControlStateMachine::FAILED_STATE_NAME = "Failed";
22 const std::string RunControlStateMachine::HALTED_STATE_NAME = "Halted";
23 
24 //========================================================================================================================
25 RunControlStateMachine::RunControlStateMachine(const std::string& name)
26  : theStateMachine_(name)
27  , asyncFailureReceived_(false)
28  , asyncSoftFailureReceived_(false)
29 {
30  INIT_MF("RunControlStateMachine");
31 
32  theStateMachine_.addState(
33  'I', "Initial", this, &RunControlStateMachine::stateInitial);
34  theStateMachine_.addState('H', RunControlStateMachine::HALTED_STATE_NAME, this, &RunControlStateMachine::stateHalted);
35  theStateMachine_.addState(
36  'C', "Configured", this, &RunControlStateMachine::stateConfigured);
37  theStateMachine_.addState(
38  'R', "Running", this, &RunControlStateMachine::stateRunning);
39  theStateMachine_.addState('P', "Paused", this, &RunControlStateMachine::statePaused);
40  theStateMachine_.addState(
41  'X', "Shutdown", this, &RunControlStateMachine::stateShutdown);
42  // theStateMachine_.addState('v', "Recovering", this,
43  // &RunControlStateMachine::stateRecovering); theStateMachine_.addState('T',
44  // "TTSTestMode", this, &RunControlStateMachine::stateTTSTestMode);
45 
46  // RAR added back in on 11/20/2016.. why was it removed..
47  // exceptions like..
48  // XCEPT_RAISE (toolbox::fsm::exception::Exception, ss.str());)
49  // take state machine to "failed" otherwise
50  theStateMachine_.setStateName('F', RunControlStateMachine::FAILED_STATE_NAME); // x
51  theStateMachine_.setFailedStateTransitionAction(
52  this, &RunControlStateMachine::enteringError);
53  theStateMachine_.setFailedStateTransitionChanged(this,
54  &RunControlStateMachine::inError);
55 
56  // this line was added to get out of Failed state
57  RunControlStateMachine::addStateTransition(
58  'F', 'H', "Halt", "Halting", this, &RunControlStateMachine::transitionHalting);
59  RunControlStateMachine::addStateTransition(
60  'F',
61  'X',
62  "Shutdown",
63  "Shutting Down",
64  this,
65  &RunControlStateMachine::transitionShuttingDown);
66 
67  RunControlStateMachine::addStateTransition(
68  'H',
69  'C',
70  "Configure",
71  "Configuring",
72  "ConfigurationAlias",
73  this,
74  &RunControlStateMachine::transitionConfiguring);
75  RunControlStateMachine::addStateTransition(
76  'H',
77  'X',
78  "Shutdown",
79  "Shutting Down",
80  this,
81  &RunControlStateMachine::transitionShuttingDown);
82  RunControlStateMachine::addStateTransition(
83  'X',
84  'I',
85  "Startup",
86  "Starting Up",
87  this,
88  &RunControlStateMachine::transitionStartingUp);
89 
90  // Every state can transition to halted
91  RunControlStateMachine::addStateTransition(
92  'I',
93  'H',
94  "Initialize",
95  "Initializing",
96  this,
97  &RunControlStateMachine::transitionInitializing);
98  RunControlStateMachine::addStateTransition(
99  'H', 'H', "Halt", "Halting", this, &RunControlStateMachine::transitionHalting);
100  RunControlStateMachine::addStateTransition(
101  'C', 'H', "Halt", "Halting", this, &RunControlStateMachine::transitionHalting);
102  RunControlStateMachine::addStateTransition(
103  'R', 'H', "Abort", "Aborting", this, &RunControlStateMachine::transitionHalting);
104  RunControlStateMachine::addStateTransition(
105  'P', 'H', "Abort", "Aborting", this, &RunControlStateMachine::transitionHalting);
106 
107  RunControlStateMachine::addStateTransition(
108  'R', 'P', "Pause", "Pausing", this, &RunControlStateMachine::transitionPausing);
109  RunControlStateMachine::addStateTransition(
110  'P',
111  'R',
112  "Resume",
113  "Resuming",
114  this,
115  &RunControlStateMachine::transitionResuming);
116  RunControlStateMachine::addStateTransition(
117  'C', 'R', "Start", "Starting", this, &RunControlStateMachine::transitionStarting);
118  RunControlStateMachine::addStateTransition(
119  'R', 'C', "Stop", "Stopping", this, &RunControlStateMachine::transitionStopping);
120  RunControlStateMachine::addStateTransition(
121  'P', 'C', "Stop", "Stopping", this, &RunControlStateMachine::transitionStopping);
122 
123  // NOTE!! There must be a defined message handler for each transition name created
124  // above
125  xoap::bind(this,
126  &RunControlStateMachine::runControlMessageHandler,
127  "Initialize",
128  XDAQ_NS_URI);
129  xoap::bind(this,
130  &RunControlStateMachine::runControlMessageHandler,
131  "Configure",
132  XDAQ_NS_URI);
133  xoap::bind(
134  this, &RunControlStateMachine::runControlMessageHandler, "Start", XDAQ_NS_URI);
135  xoap::bind(
136  this, &RunControlStateMachine::runControlMessageHandler, "Stop", XDAQ_NS_URI);
137  xoap::bind(
138  this, &RunControlStateMachine::runControlMessageHandler, "Pause", XDAQ_NS_URI);
139  xoap::bind(
140  this, &RunControlStateMachine::runControlMessageHandler, "Resume", XDAQ_NS_URI);
141  xoap::bind(
142  this, &RunControlStateMachine::runControlMessageHandler, "Halt", XDAQ_NS_URI);
143  xoap::bind(
144  this, &RunControlStateMachine::runControlMessageHandler, "Abort", XDAQ_NS_URI);
145  xoap::bind(
146  this, &RunControlStateMachine::runControlMessageHandler, "Shutdown", XDAQ_NS_URI);
147  xoap::bind(
148  this, &RunControlStateMachine::runControlMessageHandler, "Startup", XDAQ_NS_URI);
149  xoap::bind(
150  this, &RunControlStateMachine::runControlMessageHandler, "Fail", XDAQ_NS_URI);
151  xoap::bind(
152  this, &RunControlStateMachine::runControlMessageHandler, "Error", XDAQ_NS_URI);
153 
154  xoap::bind(this,
155  &RunControlStateMachine::runControlMessageHandler,
156  "AsyncError",
157  XDAQ_NS_URI);
158  xoap::bind(this,
159  &RunControlStateMachine::runControlMessageHandler,
160  "AsyncSoftError",
161  XDAQ_NS_URI);
162 
163  reset();
164 }
165 
166 //========================================================================================================================
167 RunControlStateMachine::~RunControlStateMachine(void) {}
168 
169 //========================================================================================================================
170 void RunControlStateMachine::reset(void)
171 {
172  __COUT__ << "Resetting RunControlStateMachine with name '"
173  << theStateMachine_.getStateMachineName() << "'..." << __E__;
174  theStateMachine_.setInitialState('I');
175  theStateMachine_.reset();
176 
177  theStateMachine_.setErrorMessage("", false /*append*/); // clear error message
178 
179  asyncFailureReceived_ = false;
180  asyncSoftFailureReceived_ = false;
181 }
182 
184 //(RunControlStateMachine::stateMachineFunction_t)
185 // RunControlStateMachine::getTransitionName( const toolbox::fsm::State from,
186 // const std::string& transition)
187 //{
188 // auto itFrom = stateTransitionFunctionTable_.find(from);
189 // if(itFrom == stateTransitionFunctionTable_.end())
190 // {
191 // __SS__ << "Cannot find transition function from '" << from <<
192 // "' with transition '" << transition << "!'" << __E__;
193 // XCEPT_RAISE (toolbox::fsm::exception::Exception, ss.str());
194 // }
195 //
196 // auto itTrans = itFrom->second.find(transition);
197 // if(itTrans == itFrom->second.end())
198 // {
199 // __SS__ << "Cannot find transition function from '" << from <<
200 // "' with transition '" << transition << "!'" << __E__;
201 // XCEPT_RAISE (toolbox::fsm::exception::Exception, ss.str());
202 // }
203 //
204 // return itTrans->second;
205 //}
206 
207 //========================================================================================================================
208 // runControlMessageHandler
209 // Handles the command broadcast message from the Gateway Supervisor
210 // and maps the command to a transition function, allowing for multiple iteration
211 // passes through the transition function.
212 xoap::MessageReference RunControlStateMachine::runControlMessageHandler(
213  xoap::MessageReference message)
214 
215 {
216  __COUT__ << "Received... \t" << SOAPUtilities::translate(message) << std::endl;
217 
218  std::string command = SOAPUtilities::translate(message).getCommand();
219 
220  // get iteration index
221  try
222  {
223  StringMacros::getNumber(
224  SOAPUtilities::translate(message).getParameters().getValue("iterationIndex"),
225  iterationIndex_);
226  }
227  catch(...) // ignore errors and set iteration index to 0
228  {
229  __COUT__ << "Defaulting iteration index to 0." << __E__;
230  iterationIndex_ = 0;
231  }
232  // get subIteration index
233  try
234  {
235  StringMacros::getNumber(
236  SOAPUtilities::translate(message).getParameters().getValue(
237  "subIterationIndex"),
238  subIterationIndex_);
239  }
240  catch(...) // ignore errors and set subIteration index to 0
241  {
242  __COUT__ << "Defaulting subIterationIndex_ index to 0." << __E__;
243  subIterationIndex_ = 0;
244  }
245 
246  // get retransmission indicator
247  try
248  {
249  if(SOAPUtilities::translate(message).getParameters().getValue("retransmission") ==
250  "1")
251  {
252  // handle retransmission
253 
254  // attempt to stop an error if last command was same
255  if(lastIterationCommand_ == command &&
256  lastIterationIndex_ == iterationIndex_ &&
257  lastSubIterationIndex_ == subIterationIndex_)
258  {
259  __COUT__
260  << "Assuming a timeout occurred at Gateway waiting for a response. "
261  << "Attempting to avoid error, by giving last result for command '"
262  << command << "': " << lastIterationResult_ << __E__;
263  return SOAPUtilities::makeSOAPMessageReference(lastIterationResult_);
264  }
265  else
266  __COUT__ << "Looks like Gateway command '" << command
267  << "' was lost - attempting to handle retransmission." << __E__;
268  }
269  }
270  catch(...) // ignore errors for retransmission indicator (assume it is not a
271  // retransmission)
272  {
273  ;
274  }
275  lastIterationIndex_ = iterationIndex_;
276  lastSubIterationIndex_ = subIterationIndex_;
277 
278  std::string currentState;
279  if(iterationIndex_ == 0 && subIterationIndex_ == 0)
280  {
281  // this is the first iteration attempt for this transition
282  theProgressBar_.reset(command, theStateMachine_.getStateMachineName());
283  currentState = theStateMachine_.getCurrentStateName();
284  __COUT__ << "Starting state for " << theStateMachine_.getStateMachineName()
285  << " is " << currentState << " and attempting to " << command
286  << std::endl;
287  }
288  else
289  {
290  currentState = theStateMachine_.getStateName(lastIterationState_);
291 
292  __COUT__ << "Iteration index " << iterationIndex_ << "." << subIterationIndex_
293  << " for " << theStateMachine_.getStateMachineName() << " from "
294  << currentState << " attempting to " << command << std::endl;
295  }
296 
297  RunControlStateMachine::theProgressBar_.step();
298 
299  std::string result = command + "Done";
300  lastIterationResult_ = result;
301 
302  // if error is received, immediately go to fail state
303  // likely error was sent by central FSM or external xoap
304  if(command == "Error" || command == "Fail")
305  {
306  __SS__ << command << " was received! Halting immediately." << std::endl;
307  __COUT_ERR__ << "\n" << ss.str();
308 
309  try
310  {
311  if(currentState == "Configured")
312  theStateMachine_.execTransition("Halt", message);
313  else if(currentState == "Running" || currentState == "Paused")
314  theStateMachine_.execTransition("Abort", message);
315  }
316  catch(...)
317  {
318  __COUT_ERR__ << "Halting failed in reaction to " << command << "... ignoring."
319  << __E__;
320  }
321  return SOAPUtilities::makeSOAPMessageReference(result);
322  }
323  else if(command == "AsyncError")
324  {
325  std::string errorMessage =
326  SOAPUtilities::translate(message).getParameters().getValue("ErrorMessage");
327 
328  __SS__ << command << " was received! Error'ing immediately: " << errorMessage
329  << std::endl;
330  __COUT_ERR__ << "\n" << ss.str();
331  theStateMachine_.setErrorMessage(ss.str());
332 
333  asyncFailureReceived_ = true; // mark flag, to be used to abort next transition
334  // determine any valid transition from where we are
335  theStateMachine_.execTransition("fail");
336  // XCEPT_RAISE (toolbox::fsm::exception::Exception, ss.str());
337 
338  return SOAPUtilities::makeSOAPMessageReference(result);
339  }
340  else if(command == "AsyncSoftError")
341  {
342  std::string errorMessage =
343  SOAPUtilities::translate(message).getParameters().getValue("ErrorMessage");
344 
345  __SS__ << command << " was received! Pause'ing immediately: " << errorMessage
346  << std::endl;
347  __COUT_ERR__ << "\n" << ss.str();
348  theStateMachine_.setErrorMessage(ss.str());
349 
350  if(!asyncSoftFailureReceived_) // launch pause only first time
351  {
352  asyncSoftFailureReceived_ = true; // mark flag, to be used to avoid double
353  // pausing and identify pause was due to
354  // soft error
355  theStateMachine_.execTransition("Pause");
356  }
357 
358  return SOAPUtilities::makeSOAPMessageReference(result);
359  }
360 
361  // if already Halted, respond to Initialize with "done"
362  // (this avoids race conditions involved with artdaq mpi reset)
363  if(command == "Initialize" && currentState == RunControlStateMachine::HALTED_STATE_NAME)
364  {
365  __COUT__ << "Already Initialized.. ignoring Initialize command." << std::endl;
366 
367  theStateMachine_.setErrorMessage("", false /*append*/); // clear error message
368  return SOAPUtilities::makeSOAPMessageReference(result);
369  }
370 
371  __COUTV__(command);
372  __COUTV__(currentState);
373 
374  if(command == "Halt" && currentState == "Initial")
375  {
376  __COUT__ << "Converting Halt command to Initialize, since currently in "
377  "Initialized state."
378  << std::endl;
379  command = "Initialize";
380  message = SOAPUtilities::makeSOAPMessageReference(command);
381  }
382 
383  // handle normal transitions here
384  try
385  {
386  theStateMachine_.setErrorMessage("", false /*append*/); // clear error message
387 
388  iterationWorkFlag_ = false;
389  subIterationWorkFlag_ = false;
390  if(iterationIndex_ || subIterationIndex_)
391  {
392  __COUT__ << command << " iteration " << iterationIndex_ << "."
393  << subIterationIndex_ << __E__;
394  toolbox::Event::Reference event(new toolbox::Event(command, this));
395 
396  // call inheriting transition function based on last state and command
397  {
398  // e.g. transitionConfiguring(event);
399  __COUT__ << "Iterating on the transition function from " << currentState
400  << " through " << lastIterationCommand_ << __E__;
401 
402  auto itFrom = stateTransitionFunctionTable_.find(lastIterationState_);
403  if(itFrom == stateTransitionFunctionTable_.end())
404  {
405  __SS__ << "Cannot find transition function from '" << currentState
406  << "' with transition '" << lastIterationCommand_ << "!'"
407  << __E__;
408  __COUT_ERR__ << ss.str();
409  XCEPT_RAISE(toolbox::fsm::exception::Exception, ss.str());
410  }
411 
412  auto itTransition = itFrom->second.find(lastIterationCommand_);
413  if(itTransition == itFrom->second.end())
414  {
415  __SS__ << "Cannot find transition function from '" << currentState
416  << "' with transition '" << lastIterationCommand_ << "!'"
417  << __E__;
418  __COUT_ERR__ << ss.str();
419  XCEPT_RAISE(toolbox::fsm::exception::Exception, ss.str());
420  }
421 
422  (this->*(itTransition->second))(event); // call the transition function
423  }
424  }
425  else
426  {
427  // save the lookup parameters for the last function to be called for the case
428  // of additional iterations
429  lastIterationState_ = theStateMachine_.getCurrentState();
430  lastIterationCommand_ = command;
431 
432  theStateMachine_.execTransition(command, message);
433  }
434 
435  if(subIterationWorkFlag_) // sub-iteration has priority over 'Working'
436  {
437  __COUTV__(subIterationWorkFlag_);
438  result =
439  command + "SubIterate"; // indicate another sub-iteration back to Gateway
440  }
441  else if(iterationWorkFlag_)
442  {
443  __COUTV__(iterationWorkFlag_);
444  result = command + "Iterate"; // indicate another iteration back to Gateway
445  }
446  }
447  catch(toolbox::fsm::exception::Exception& e)
448  {
449  __SS__ << "Run Control Message Handling Failed: " << e.what() << " "
450  << theStateMachine_.getErrorMessage() << __E__;
451  __COUT_ERR__ << ss.str();
452  theStateMachine_.setErrorMessage(ss.str());
453 
454  result = command + " " + RunControlStateMachine::FAILED_STATE_NAME + ": " +
455  theStateMachine_.getErrorMessage();
456  }
457  catch(...)
458  {
459  __SS__ << "Run Control Message Handling encountered an unknown error."
460  << theStateMachine_.getErrorMessage() << __E__;
461  __COUT_ERR__ << ss.str();
462  theStateMachine_.setErrorMessage(ss.str());
463 
464  result = command + " " + RunControlStateMachine::FAILED_STATE_NAME + ": " +
465  theStateMachine_.getErrorMessage();
466  }
467 
468  RunControlStateMachine::theProgressBar_.step();
469 
470  currentState = theStateMachine_.getCurrentStateName();
471 
472  if(currentState == RunControlStateMachine::FAILED_STATE_NAME)
473  {
474  result = command + " " + RunControlStateMachine::FAILED_STATE_NAME + ": " +
475  theStateMachine_.getErrorMessage();
476  __COUT_ERR__ << "Unexpected Failure state for "
477  << theStateMachine_.getStateMachineName() << " is " << currentState
478  << std::endl;
479  __COUT_ERR__ << "Error message was as follows: "
480  << theStateMachine_.getErrorMessage() << std::endl;
481  }
482 
483  RunControlStateMachine::theProgressBar_.step();
484 
485  if(!iterationWorkFlag_ && !subIterationWorkFlag_)
486  theProgressBar_.complete();
487  else
488  {
489  __COUTV__(theProgressBar_.read());
490  __COUTV__(theProgressBar_.isComplete());
491  }
492 
493  __COUT__ << "Ending state for " << theStateMachine_.getStateMachineName() << " is "
494  << currentState << std::endl;
495  __COUT__ << "result = " << result << std::endl;
496  lastIterationResult_ = result;
497  return SOAPUtilities::makeSOAPMessageReference(result);
498 }