artdaq  v2_03_03
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Pages
CommandableFragmentGenerator.cc
1 #include "artdaq/Application/CommandableFragmentGenerator.hh"
2 
3 #include <boost/exception/all.hpp>
4 #include <boost/throw_exception.hpp>
5 
6 #include <limits>
7 #include <iterator>
8 
9 #include "canvas/Utilities/Exception.h"
10 #include "cetlib_except/exception.h"
11 #include "fhiclcpp/ParameterSet.h"
12 #include "artdaq-core/Utilities/SimpleLookupPolicy.hh"
13 #include "artdaq-core/Data/Fragment.hh"
14 #include "artdaq-core/Data/ContainerFragmentLoader.hh"
15 #include "artdaq-core/Utilities/ExceptionHandler.hh"
16 #include "artdaq-core/Utilities/TimeUtils.hh"
17 
18 #include <fstream>
19 #include <iomanip>
20 #include <iterator>
21 #include <iostream>
22 #include <iomanip>
23 #include <sys/poll.h>
25 
27  : mutex_()
28  , request_port_(3001)
29  , request_addr_("227.128.12.26")
30  , requests_()
31  , request_stop_requested_(false)
32  , request_received_(false)
33  , end_of_run_timeout_ms_(1000)
34  , windowOffset_(0)
35  , windowWidth_(0)
36  , staleTimeout_(Fragment::InvalidTimestamp)
37  , expectedType_(Fragment::EmptyFragmentType)
38  , maxFragmentCount_(std::numeric_limits<size_t>::max())
39  , uniqueWindows_(true)
40  , last_window_send_time_()
41  , missing_request_window_timeout_us_(1000000)
42  , window_close_timeout_us_(2000000)
43  , useDataThread_(false)
44  , sleep_on_no_data_us_(0)
45  , data_thread_running_(false)
46  , dataBufferDepthFragments_(0)
47  , dataBufferDepthBytes_(0)
48  , maxDataBufferDepthFragments_(1000)
49  , maxDataBufferDepthBytes_(1000)
50  , useMonitoringThread_(false)
51  , monitoringInterval_(0)
52  , lastMonitoringCall_()
53  , isHardwareOK_(true)
54  , dataBuffer_()
55  , newDataBuffer_()
56  , run_number_(-1)
57  , subrun_number_(-1)
58  , timeout_(std::numeric_limits<uint64_t>::max())
59  , timestamp_(std::numeric_limits<uint64_t>::max())
60  , should_stop_(false)
61  , exception_(false)
62  , force_stop_(false)
63  , latest_exception_report_("none")
64  , ev_counter_(1)
65  , board_id_(-1)
66  , instance_name_for_metrics_("FragmentGenerator")
67  , sleep_on_stop_us_(0)
68 {}
69 
71  : mutex_()
72  , request_port_(ps.get<int>("request_port", 3001))
73  , request_addr_(ps.get<std::string>("request_address", "227.128.12.26"))
74  , requests_()
75  , request_stop_requested_(false)
76  , request_received_(false)
77  , end_of_run_timeout_ms_(ps.get<size_t>("end_of_run_quiet_timeout_ms", 1000))
78  , windowOffset_(ps.get<Fragment::timestamp_t>("request_window_offset", 0))
79  , windowWidth_(ps.get<Fragment::timestamp_t>("request_window_width", 0))
80  , staleTimeout_(ps.get<Fragment::timestamp_t>("stale_request_timeout", 0xFFFFFFFF))
81  , expectedType_(ps.get<Fragment::type_t>("expected_fragment_type", Fragment::type_t(Fragment::EmptyFragmentType)))
82  , uniqueWindows_(ps.get<bool>("request_windows_are_unique", true))
83  , last_window_send_time_(std::chrono::steady_clock::now())
84  , missing_request_window_timeout_us_(ps.get<size_t>("missing_request_window_timeout_us", 1000000))
85  , window_close_timeout_us_(ps.get<size_t>("window_close_timeout_us", 2000000))
86  , useDataThread_(ps.get<bool>("separate_data_thread", false))
87  , sleep_on_no_data_us_(ps.get<size_t>("sleep_on_no_data_us", 0))
88  , data_thread_running_(false)
89  , dataBufferDepthFragments_(0)
90  , dataBufferDepthBytes_(0)
91  , maxDataBufferDepthFragments_(ps.get<int>("data_buffer_depth_fragments", 1000))
92  , maxDataBufferDepthBytes_(ps.get<size_t>("data_buffer_depth_mb", 1000) * 1024 * 1024)
93  , useMonitoringThread_(ps.get<bool>("separate_monitoring_thread", false))
94  , monitoringInterval_(ps.get<int64_t>("hardware_poll_interval_us", 0))
95  , lastMonitoringCall_()
96  , isHardwareOK_(true)
97  , dataBuffer_()
98  , newDataBuffer_()
99  , run_number_(-1)
100  , subrun_number_(-1)
101  , timeout_(std::numeric_limits<uint64_t>::max())
102  , timestamp_(std::numeric_limits<uint64_t>::max())
103  , should_stop_(false)
104  , exception_(false)
105  , force_stop_(false)
106  , latest_exception_report_("none")
107  , ev_counter_(1)
108  , board_id_(-1)
109  , sleep_on_stop_us_(0)
110 {
111  board_id_ = ps.get<int>("board_id");
112  instance_name_for_metrics_ = "BoardReader." + boost::lexical_cast<std::string>(board_id_);
113 
114  fragment_ids_ = ps.get<std::vector<artdaq::Fragment::fragment_id_t>>("fragment_ids", std::vector<artdaq::Fragment::fragment_id_t>());
115 
116  TLOG_TRACE("CommandableFragmentGenerator") << "artdaq::CommandableFragmentGenerator::CommandableFragmentGenerator(ps)" << TLOG_ENDL;
117  int fragment_id = ps.get<int>("fragment_id", -99);
118 
119  if (fragment_id != -99)
120  {
121  if (fragment_ids_.size() != 0)
122  {
123  latest_exception_report_ = "Error in CommandableFragmentGenerator: can't both define \"fragment_id\" and \"fragment_ids\" in FHiCL document";
124  throw cet::exception(latest_exception_report_);
125  }
126  else
127  {
128  fragment_ids_.emplace_back(fragment_id);
129  }
130  }
131 
132  sleep_on_stop_us_ = ps.get<int>("sleep_on_stop_us", 0);
133 
134  dataBuffer_.emplace_back(FragmentPtr(new Fragment()));
135  (*dataBuffer_.begin())->setSystemType(Fragment::EmptyFragmentType);
136 
137  std::string modeString = ps.get<std::string>("request_mode", "ignored");
138  if (modeString == "single" || modeString == "Single")
139  {
140  mode_ = RequestMode::Single;
141  }
142  else if (modeString.find("buffer") != std::string::npos || modeString.find("Buffer") != std::string::npos)
143  {
144  mode_ = RequestMode::Buffer;
145  }
146  else if (modeString == "window" || modeString == "Window")
147  {
148  mode_ = RequestMode::Window;
149  }
150  else if (modeString.find("ignore") != std::string::npos || modeString.find("Ignore") != std::string::npos)
151  {
152  mode_ = RequestMode::Ignored;
153  }
154  TLOG_DEBUG("CommandableFragmentGenerator") << "Request mode is " << printMode_() << TLOG_ENDL;
155 
156  if (mode_ != RequestMode::Ignored)
157  {
158  if (!useDataThread_)
159  {
160  latest_exception_report_ = "Error in CommandableFragmentGenerator: use_data_thread must be true when request_mode is not \"Ignored\"!";
161  throw cet::exception(latest_exception_report_);
162  }
164  }
165 }
166 
168 {
169  request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
170  if (!request_socket_)
171  {
172  throw art::Exception(art::errors::Configuration) << "CommandableFragmentGenerator: Error creating socket for receiving data requests!" << std::endl;
173  exit(1);
174  }
175 
176  struct sockaddr_in si_me_request;
177 
178  int yes = 1;
179  if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
180  {
181  throw art::Exception(art::errors::Configuration) <<
182  "RequestedFragmentGenrator: Unable to enable port reuse on request socket" << std::endl;
183  exit(1);
184  }
185  memset(&si_me_request, 0, sizeof(si_me_request));
186  si_me_request.sin_family = AF_INET;
187  si_me_request.sin_port = htons(request_port_);
188  si_me_request.sin_addr.s_addr = htonl(INADDR_ANY);
189  if (bind(request_socket_, (struct sockaddr *)&si_me_request, sizeof(si_me_request)) == -1)
190  {
191  throw art::Exception(art::errors::Configuration) <<
192  "CommandableFragmentGenerator: Cannot bind request socket to port " << request_port_ << std::endl;
193  exit(1);
194  }
195 
196  if (request_addr_ != "localhost")
197  {
198  struct ip_mreq mreq;
199  int sts = ResolveHost(request_addr_.c_str(), mreq.imr_multiaddr);
200  if (sts == -1)
201  {
202  throw art::Exception(art::errors::Configuration) << "Unable to resolve multicast request address" << std::endl;
203  exit(1);
204  }
205  mreq.imr_interface.s_addr = htonl(INADDR_ANY);
206  if (setsockopt(request_socket_, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
207  {
208  throw art::Exception(art::errors::Configuration) <<
209  "CommandableFragmentGenerator: Unable to join multicast group" << std::endl;
210  exit(1);
211  }
212  }
213 }
214 
216 {
217  force_stop_ = true;
218  should_stop_ = true;
219  TLOG_DEBUG("CommandableFragmentGenerator") << "Joining dataThread" << TLOG_ENDL;
220  if (dataThread_.joinable()) dataThread_.join();
221  TLOG_DEBUG("CommandableFragmentGenerator") << "Joining monitoringThread" << TLOG_ENDL;
222  if (monitoringThread_.joinable()) monitoringThread_.join();
223  TLOG_DEBUG("CommandableFragmentGenerator") << "Joining requestThread" << TLOG_ENDL;
224  if (requestThread_.joinable()) requestThread_.join();
225  if (request_socket_ != -1) close(request_socket_);
226 }
227 
229 {
230  bool result = true;
231 
232  if (check_stop()) usleep(sleep_on_stop_us_);
233  if (exception() || force_stop_) return false;
234 
235  if (!useMonitoringThread_ && monitoringInterval_ > 0)
236  {
237  TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: Checking whether to collect Monitoring Data" << TLOG_ENDL;
238  auto now = std::chrono::steady_clock::now();
239  if (std::chrono::duration_cast<std::chrono::microseconds>(now - lastMonitoringCall_).count() >= monitoringInterval_)
240  {
241  TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: Collecting Monitoring Data" << TLOG_ENDL;
242  isHardwareOK_ = checkHWStatus_();
243  TLOG_ARB(10, "CommandableFragmentGenerator") << "getNext: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ << TLOG_ENDL;
244  lastMonitoringCall_ = now;
245  }
246  }
247 
248  try
249  {
250  std::lock_guard<std::mutex> lk(mutex_);
251  if (useDataThread_)
252  {
253  TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Calling applyRequests" << TLOG_ENDL;
254  result = applyRequests(output);
255  TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Done with applyRequests" << TLOG_ENDL;
256 
257  if (exception())
258  {
259  throw cet::exception("CommandableFragmentGenerator") << "Exception found in BoardReader with board ID " << board_id() << "; BoardReader will now return error status when queried";
260  }
261  }
262  else
263  {
264  if (!isHardwareOK_)
265  {
266  TLOG_ERROR("CommandableFragmentGenerator") << "Stopping CFG because the hardware reports bad status!" << TLOG_ENDL;
267  return false;
268  }
269  TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Calling getNext_ " << std::to_string(ev_counter()) << TLOG_ENDL;
270  try
271  {
272  result = getNext_(output);
273  }
274  catch (...)
275  {
276  throw;
277  }
278  TLOG_TRACE("CommandableFragmentGenerator") << "getNext: Done with getNext_ " << std::to_string(ev_counter()) << TLOG_ENDL;
279  }
280  }
281  catch (const cet::exception& e)
282  {
283  latest_exception_report_ = "cet::exception caught in getNext(): ";
284  latest_exception_report_.append(e.what());
285  TLOG_ERROR("CommandableFragmentGenerator") << "getNext: cet::exception caught: " << e << TLOG_ENDL;
286  set_exception(true);
287  return false;
288  }
289  catch (const boost::exception& e)
290  {
291  latest_exception_report_ = "boost::exception caught in getNext(): ";
292  latest_exception_report_.append(boost::diagnostic_information(e));
293  TLOG_ERROR("CommandableFragmentGenerator") << "getNext: boost::exception caught: " << boost::diagnostic_information(e) << TLOG_ENDL;
294  set_exception(true);
295  return false;
296  }
297  catch (const std::exception& e)
298  {
299  latest_exception_report_ = "std::exception caught in getNext(): ";
300  latest_exception_report_.append(e.what());
301  TLOG_ERROR("CommandableFragmentGenerator") << "getNext: std::exception caught: " << e.what() << TLOG_ENDL;
302  set_exception(true);
303  return false;
304  }
305  catch (...)
306  {
307  latest_exception_report_ = "Unknown exception caught in getNext().";
308  TLOG_ERROR("CommandableFragmentGenerator") << "getNext: unknown exception caught" << TLOG_ENDL;
309  set_exception(true);
310  return false;
311  }
312 
313  if (!result)
314  {
315  TLOG_DEBUG("getNext") << "stopped " << TLOG_ENDL;
316  }
317 
318  return result;
319 }
320 
322 {
323  TLOG_ARB(14, "CommandableFragmentGeneraotr") << "CFG::check_stop: should_stop=" << should_stop() << ", useDataThread_=" << useDataThread_ << ", requests_.size()=" << std::to_string(requests_.size()) << ", exception status =" << int(exception()) << TLOG_ENDL;
324 
325  if (!should_stop()) return false;
326  if (!useDataThread_ || mode_ == RequestMode::Ignored) return true;
327  if (force_stop_) return true;
328 
329  if (!request_received_)
330  {
331  TLOG_ERROR("CommandableFragmentGenerator") << "Stop request received by request-based CommandableFragmentGenerator, but no requests have been received." << std::endl
332  << "Check that UDP port " << request_port_ << " is open in the firewall config." << TLOG_ENDL;
333  return true;
334  }
335 
336  if (!request_stop_requested_) return false;
337 
338  auto dur = std::chrono::steady_clock::now() - request_stop_timeout_;
339  return std::chrono::duration_cast<std::chrono::milliseconds>(dur).count() > static_cast<int>(end_of_run_timeout_ms_);// && requests_.size() == 0;
340 }
341 
343 {
344  if (fragment_ids_.size() != 1)
345  {
346  throw cet::exception("Error in CommandableFragmentGenerator: can't call fragment_id() unless member fragment_ids_ vector is length 1");
347  }
348  else
349  {
350  return fragment_ids_[0];
351  }
352 }
353 
355 {
356  if (force || mode_ == RequestMode::Ignored)
357  {
358  return ev_counter_.fetch_add(step);
359  }
360  return ev_counter_.load();
361 } // returns the prev value
362 
363 void artdaq::CommandableFragmentGenerator::StartCmd(int run, uint64_t timeout, uint64_t timestamp)
364 {
365  if (run < 0) throw cet::exception("CommandableFragmentGenerator") << "negative run number";
366 
367  timeout_ = timeout;
368  timestamp_ = timestamp;
369  ev_counter_.store(1);
370  should_stop_.store(false);
371  exception_.store(false);
372  run_number_ = run;
373  subrun_number_ = 1;
374  latest_exception_report_ = "none";
375  dataBuffer_.clear();
376  requests_.clear();
377 
378  start();
379 
380  std::unique_lock<std::mutex> lk(mutex_);
381  if (useDataThread_) startDataThread();
382  if (useMonitoringThread_) startMonitoringThread();
383  if (mode_ != RequestMode::Ignored) startRequestReceiverThread();
384 }
385 
386 void artdaq::CommandableFragmentGenerator::StopCmd(uint64_t timeout, uint64_t timestamp)
387 {
388  TLOG_DEBUG("CommandableFragmentGenerator") << "Stop Command received." << TLOG_ENDL;
389 
390  timeout_ = timeout;
391  timestamp_ = timestamp;
392 
393  stopNoMutex();
394  should_stop_.store(true);
395  std::unique_lock<std::mutex> lk(mutex_);
396  stop();
397 }
398 
399 void artdaq::CommandableFragmentGenerator::PauseCmd(uint64_t timeout, uint64_t timestamp)
400 {
401  timeout_ = timeout;
402  timestamp_ = timestamp;
403 
404  pauseNoMutex();
405  should_stop_.store(true);
406  std::unique_lock<std::mutex> lk(mutex_);
407 
408  pause();
409 }
410 
411 void artdaq::CommandableFragmentGenerator::ResumeCmd(uint64_t timeout, uint64_t timestamp)
412 {
413  timeout_ = timeout;
414  timestamp_ = timestamp;
415 
416  subrun_number_ += 1;
417  should_stop_ = false;
418 
419  dataBuffer_.clear();
420  requests_.clear();
421 
422  // no lock required: thread not started yet
423  resume();
424 
425  std::unique_lock<std::mutex> lk(mutex_);
426  if (useDataThread_) startDataThread();
427  if (useMonitoringThread_) startMonitoringThread();
428  if (mode_ != RequestMode::Ignored) startRequestReceiverThread();
429 }
430 
431 std::string artdaq::CommandableFragmentGenerator::ReportCmd(std::string const& which)
432 {
433  std::lock_guard<std::mutex> lk(mutex_);
434 
435  // 14-May-2015, KAB: please see the comments associated with the report()
436  // methods in the CommandableFragmentGenerator.hh file for more information
437  // on the use of those methods in this method.
438 
439  // check if the child class has something meaningful for this request
440  std::string childReport = reportSpecific(which);
441  if (childReport.length() > 0) { return childReport; }
442 
443  // handle the requests that we can take care of at this level
444  if (which == "latest_exception")
445  {
446  return latest_exception_report_;
447  }
448 
449  // check if the child class has provided a catch-all report function
450  childReport = report();
451  if (childReport.length() > 0) { return childReport; }
452 
453  // if we haven't been able to come up with any report so far, say so
454  std::string tmpString = "The \"" + which + "\" command is not ";
455  tmpString.append("currently supported by the ");
456  tmpString.append(metricsReportingInstanceName());
457  tmpString.append(" fragment generator.");
458  return tmpString;
459 }
460 
461 // Default implemenetations of state functions
462 void artdaq::CommandableFragmentGenerator::pauseNoMutex()
463 {
464 #pragma message "Using default implementation of CommandableFragmentGenerator::pauseNoMutex()"
465 }
466 
467 void artdaq::CommandableFragmentGenerator::pause()
468 {
469 #pragma message "Using default implementation of CommandableFragmentGenerator::pause()"
470 }
471 
472 void artdaq::CommandableFragmentGenerator::resume()
473 {
474 #pragma message "Using default implementation of CommandableFragmentGenerator::resume()"
475 }
476 
477 std::string artdaq::CommandableFragmentGenerator::report()
478 {
479 #pragma message "Using default implementation of CommandableFragmentGenerator::report()"
480  return "";
481 }
482 
483 std::string artdaq::CommandableFragmentGenerator::reportSpecific(std::string const&)
484 {
485 #pragma message "Using default implementation of CommandableFragmentGenerator::reportSpecific(std::string)"
486  return "";
487 }
488 
489 bool artdaq::CommandableFragmentGenerator::checkHWStatus_()
490 {
491 #pragma message "Using default implementation of CommandableFragmentGenerator::checkHWStatus_()"
492  return true;
493 }
494 
496 {
497  if (dataThread_.joinable()) dataThread_.join();
498  TLOG_INFO("CommandableFragmentGenerator") << "Starting Data Receiver Thread" << TLOG_ENDL;
499  dataThread_ = std::thread(&CommandableFragmentGenerator::getDataLoop, this);
500 }
501 
503 {
504  if (monitoringThread_.joinable()) monitoringThread_.join();
505  TLOG_INFO("CommandableFragmentGenerator") << "Starting Hardware Monitoring Thread" << TLOG_ENDL;
506  monitoringThread_ = std::thread(&CommandableFragmentGenerator::getMonitoringDataLoop, this);
507 }
508 
510 {
511  if (requestThread_.joinable()) requestThread_.join();
512  TLOG_INFO("CommandableFragmentGenerator") << "Starting Request Reception Thread" << TLOG_ENDL;
513  requestThread_ = std::thread(&CommandableFragmentGenerator::receiveRequestsLoop, this);
514 }
515 
517 {
518  switch (mode_)
519  {
520  case RequestMode::Single:
521  return "Single";
522  case RequestMode::Buffer:
523  return "Buffer";
524  case RequestMode::Window:
525  return "Window";
526  case RequestMode::Ignored:
527  return "Ignored";
528  }
529 
530  return "ERROR";
531 }
532 
534 {
535  data_thread_running_ = true;
536  while (!force_stop_)
537  {
538  if (!isHardwareOK_)
539  {
540  TLOG_DEBUG("CommandableFragmentGenerator") << "getDataLoop: isHardwareOK is " << isHardwareOK_ << ", aborting data thread" << TLOG_ENDL;
541  data_thread_running_ = false;
542  return;
543  }
544 
545  TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: calling getNext_" << TLOG_ENDL;
546 
547  bool data = false;
548  auto startdata = std::chrono::steady_clock::now();
549 
550  try
551  {
552  data = getNext_(newDataBuffer_);
553  }
554  catch (...)
555  {
556  ExceptionHandler(ExceptionHandlerRethrow::no,
557  "Exception thrown by fragment generator in CommandableFragmentGenerator::getDataLoop; setting exception state to \"true\"");
558  set_exception(true);
559 
560  data_thread_running_ = false;
561  return;
562  }
563 
564  TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: checking buffer size" << TLOG_ENDL;
565  auto startwait = std::chrono::steady_clock::now();
566 
567  if (newDataBuffer_.size() == 0 && sleep_on_no_data_us_ > 0)
568  {
569  usleep(sleep_on_no_data_us_);
570  }
571  if (metricMan)
572  {
573  metricMan->sendMetric("Avg Data Acquisition Time", std::chrono::duration_cast<artdaq::TimeUtils::seconds>(startwait - startdata).count(), "s", 3, artdaq::MetricMode::Average);
574  }
575 
576  auto first = true;
577  auto lastwaittime = 0;
578  while (dataBufferIsTooLarge())
579  {
580  if (should_stop())
581  {
582  TLOG_DEBUG("CommandableFragmentGenerator") << "Run ended while waiting for buffer to shrink!" << TLOG_ENDL;
583  std::unique_lock<std::mutex> lock(dataBufferMutex_);
584  getDataBufferStats();
585  dataCondition_.notify_all();
586  data_thread_running_ = false;
587  return;
588  }
589  auto waittime = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - startwait).count();
590 
591  if (first || (waittime != lastwaittime && waittime % 1000 == 0))
592  {
593  TLOG_WARNING("CommandableFragmentGenerator") << "Bad Omen: Data Buffer has exceeded its size limits. Check the connection between the BoardReader and the EventBuilders! (seq_id=" << ev_counter() << ")" << TLOG_ENDL;
594  first = false;
595  }
596  if (waittime % 5 && waittime != lastwaittime)
597  {
598  TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: Data Retreival paused for " << std::to_string(waittime) << " ms waiting for data buffer to drain" << TLOG_ENDL;
599  }
600  lastwaittime = waittime;
601  usleep(1000);
602  }
603 
604  TLOG_ARB(13, "CommandableFragmentGenerator") << "getDataLoop: processing data" << TLOG_ENDL;
605  if (data && !force_stop_)
606  {
607  std::unique_lock<std::mutex> lock(dataBufferMutex_);
608  switch (mode_)
609  {
610  case RequestMode::Single:
611  // While here, if for some strange reason more than one event's worth of data is returned from getNext_...
612  while (newDataBuffer_.size() >= fragment_ids_.size())
613  {
614  dataBuffer_.clear();
615  auto it = newDataBuffer_.begin();
616  std::advance(it, fragment_ids_.size());
617  dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_, newDataBuffer_.begin(), it);
618  }
619  break;
620  case RequestMode::Buffer:
621  case RequestMode::Ignored:
622  case RequestMode::Window:
623  default:
624  //dataBuffer_.reserve(dataBuffer_.size() + newDataBuffer_.size());
625  dataBuffer_.splice(dataBuffer_.end(), newDataBuffer_);
626  break;
627  }
628  getDataBufferStats();
629  }
630 
631  {
632  std::unique_lock<std::mutex> lock(dataBufferMutex_);
633  if (dataBuffer_.size() > 0)
634  {
635  dataCondition_.notify_all();
636  }
637  }
638  if (!data || force_stop_)
639  {
640  TLOG_INFO("CommandableFragmentGenerator") << "Data flow has stopped. Ending data collection thread" << TLOG_ENDL;
641  data_thread_running_ = false;
642  return;
643  }
644  }
645 }
646 
648 {
649  return (maxDataBufferDepthFragments_ > 0 && dataBufferDepthFragments_ > maxDataBufferDepthFragments_) || (maxDataBufferDepthBytes_ > 0 && dataBufferDepthBytes_ > maxDataBufferDepthBytes_);
650 }
651 
653 {
655  dataBufferDepthFragments_ = dataBuffer_.size();
656  size_t acc = 0;
657  TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: Calculating buffer size" << TLOG_ENDL;
658  for (auto i = dataBuffer_.begin(); i != dataBuffer_.end(); ++i)
659  {
660  if (i->get() != nullptr)
661  {
662  acc += (*i)->sizeBytes();
663  }
664  }
665  dataBufferDepthBytes_ = acc;
666 
667  if (metricMan)
668  {
669  TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: Sending Metrics" << TLOG_ENDL;
670  metricMan->sendMetric("Buffer Depth Fragments", dataBufferDepthFragments_.load(), "fragments", 1, MetricMode::LastPoint);
671  metricMan->sendMetric("Buffer Depth Bytes", dataBufferDepthBytes_.load(), "bytes", 1, MetricMode::LastPoint);
672  }
673  TLOG_ARB(15, "CommandableFragmentGenerator") << "getDataBufferStats: frags=" << dataBufferDepthFragments_.load() << "/" << maxDataBufferDepthFragments_
674  << ", sz=" << std::to_string(dataBufferDepthBytes_.load()) << "/" << std::to_string(maxDataBufferDepthBytes_) << TLOG_ENDL;
675 }
676 
678 {
679  std::unique_lock<std::mutex> lock(dataBufferMutex_);
680  dataCondition_.wait_for(lock, std::chrono::milliseconds(10));
681  if (dataBufferDepthFragments_ > 0)
682  {
683  if ((mode_ == RequestMode::Buffer || mode_ == RequestMode::Window))
684  {
685  // Eliminate extra fragments
686  while (dataBufferIsTooLarge())
687  {
688  dataBuffer_.erase(dataBuffer_.begin());
689  getDataBufferStats();
690  }
691  if (dataBuffer_.size() > 0)
692  {
693  TLOG_ARB(17, "CommandableFragmentGenerator") << "Determining if Fragments can be dropped from data buffer" << TLOG_ENDL;
694  Fragment::timestamp_t last = dataBuffer_.back()->timestamp();
695  Fragment::timestamp_t min = last > staleTimeout_ ? last - staleTimeout_ : 0;
696  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
697  {
698  if ((*it)->timestamp() < min)
699  {
700  it = dataBuffer_.erase(it);
701  }
702  else
703  {
704  ++it;
705  }
706  }
707  getDataBufferStats();
708  }
709  }
710  else if (mode_ == RequestMode::Single && dataBuffer_.size() > fragment_ids_.size())
711  {
712  // Eliminate extra fragments
713  while (dataBuffer_.size() > fragment_ids_.size())
714  {
715  dataBuffer_.erase(dataBuffer_.begin());
716  }
717  }
718  }
719 }
720 
722 {
723  while (!force_stop_)
724  {
725  if (should_stop() || monitoringInterval_ <= 0)
726  {
727  TLOG_DEBUG("CommandableFragmentGenerator") << "getMonitoringDataLoop: should_stop() is " << std::boolalpha << should_stop()
728  << " and monitoringInterval is " << monitoringInterval_ << ", returning" << TLOG_ENDL;
729  return;
730  }
731  TLOG_ARB(12, "CommandableFragmentGenerator") << "getMonitoringDataLoop: Determining whether to call checkHWStatus_" << TLOG_ENDL;
732 
733  auto now = std::chrono::steady_clock::now();
734  if (std::chrono::duration_cast<std::chrono::microseconds>(now - lastMonitoringCall_).count() >= monitoringInterval_)
735  {
736  isHardwareOK_ = checkHWStatus_();
737  TLOG_ARB(12, "CommandableFragmentGenerator") << "getMonitoringDataLoop: isHardwareOK_ is now " << std::boolalpha << isHardwareOK_ << TLOG_ENDL;
738  lastMonitoringCall_ = now;
739  }
740  usleep(monitoringInterval_ / 10);
741  }
742 }
743 
745 {
746  while (!force_stop_)
747  {
748  if (check_stop() || !isHardwareOK_ || exception())
749  {
750  TLOG_DEBUG("CommandableFragmentGenerator") << "receiveRequestsLoop: check_stop is " << std::boolalpha << check_stop()
751  << ", isHardwareOK_ is " << isHardwareOK_ << ", and exception state is " << exception() << ", aborting request reception thread." << TLOG_ENDL;
752  return;
753  }
754 
755  // Don't listen for requests when we're going to ignore them anyway
756  if (mode_ == RequestMode::Ignored) return;
757  TLOG_ARB(16, "CommandableFragmentGenerator") << "receiveRequestsLoop: Polling Request socket for new requests" << TLOG_ENDL;
758 
759  int ms_to_wait = 1000;
760  struct pollfd ufds[1];
761  ufds[0].fd = request_socket_;
762  ufds[0].events = POLLIN | POLLPRI;
763  int rv = poll(ufds, 1, ms_to_wait);
764  if (rv > 0)
765  {
766  if (ufds[0].revents == POLLIN || ufds[0].revents == POLLPRI)
767  {
768  TLOG_ARB(11, "CommandableFragmentGenerator") << "Recieved packet on Request channel" << TLOG_ENDL;
769  detail::RequestHeader hdr_buffer;
770  recv(request_socket_, &hdr_buffer, sizeof(hdr_buffer), 0);
771  TLOG_ARB(11, "CommandableFragmentGenerator") << "Request header word: 0x" << std::hex << hdr_buffer.header << TLOG_ENDL;
772  if (hdr_buffer.isValid())
773  {
774  request_received_ = true;
775  if (hdr_buffer.mode == detail::RequestMessageMode::EndOfRun)
776  {
777  TLOG_INFO("CommandableFragmentGenerator") << "Received Request Message with the EndOfRun marker. (Re)Starting 1-second timeout for receiving all outstanding requests..." << TLOG_ENDL;
778  request_stop_timeout_ = std::chrono::steady_clock::now();
779  request_stop_requested_ = true;
780  }
781  std::vector<detail::RequestPacket> pkt_buffer(hdr_buffer.packet_count);
782  recv(request_socket_, &pkt_buffer[0], sizeof(detail::RequestPacket) * hdr_buffer.packet_count, 0);
783  bool anyNew = false;
784  for (auto& buffer : pkt_buffer)
785  {
786  if (!buffer.isValid()) continue;
787  if (requests_.count(buffer.sequence_id) && requests_[buffer.sequence_id] != buffer.timestamp)
788  {
789  TLOG_ERROR("CommandableFragmentGenerator") << "Received conflicting request for SeqID "
790  << std::to_string(buffer.sequence_id) << "!"
791  << " Old ts=" << std::to_string(requests_[buffer.sequence_id])
792  << ", new ts=" << std::to_string(buffer.timestamp) << ". Keeping OLD!" << TLOG_ENDL;
793  }
794  else if (!requests_.count(buffer.sequence_id))
795  {
796  int delta = buffer.sequence_id - ev_counter();
797  TLOG_ARB(11, "CommandableFragmentGenerator") << "Recieved request for sequence ID " << std::to_string(buffer.sequence_id)
798  << " and timestamp " << std::to_string(buffer.timestamp) << " (delta: " << delta << ")" << TLOG_ENDL;
799  if (delta < 0)
800  {
801  TLOG_ARB(11, "CommandableFragmentGenerator") << "Already serviced this request! Ignoring..." << TLOG_ENDL;
802  }
803  else
804  {
805  std::unique_lock<std::mutex> tlk(request_mutex_);
806  requests_[buffer.sequence_id] = buffer.timestamp;
807  anyNew = true;
808  }
809  }
810  }
811  if (anyNew)
812  {
813  std::unique_lock<std::mutex> lock(request_mutex_);
814  requestCondition_.notify_all();
815  }
816  }
817  }
818  }
819  }
820 }
821 
823 {
824  if (check_stop() || exception())
825  {
826  return false;
827  }
828 
829  if (mode_ == RequestMode::Ignored)
830  {
831  while (dataBufferDepthFragments_ <= 0)
832  {
833  if (check_stop() || exception() || !isHardwareOK_) return false;
834  std::unique_lock<std::mutex> lock(dataBufferMutex_);
835  dataCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return dataBufferDepthFragments_ > 0; });
836  }
837  }
838  else
839  {
840  if ((check_stop() && requests_.size() <= 0) || exception()) return false;
841  checkDataBuffer();
842 
843  while (requests_.size() <= 0)
844  {
845  if (check_stop() || exception()) return false;
846 
847  checkDataBuffer();
848 
849  std::unique_lock<std::mutex> lock(request_mutex_);
850  requestCondition_.wait_for(lock, std::chrono::milliseconds(10), [this]() { return requests_.size() > 0; });
851  }
852  }
853 
854  {
855  std::unique_lock<std::mutex> dlk(dataBufferMutex_);
856  std::unique_lock<std::mutex> rlk(request_mutex_);
857 
858 
859  if (mode_ == RequestMode::Ignored)
860  {
861  // We just copy everything that's here into the output.
862  TLOG_ARB(9, "CommandableFragmentGenerator") << "Mode is Ignored; Copying data to output" << TLOG_ENDL;
863  std::move(dataBuffer_.begin(), dataBuffer_.end(), std::inserter(frags, frags.end()));
864  dataBuffer_.clear();
865  }
866  else if (mode_ == RequestMode::Single)
867  {
868  // We only care about the latest request received. Send empties for all others.
869  sendEmptyFragments(frags);
870 
871  if (dataBuffer_.size() > 0)
872  {
873  TLOG_ARB(9, "CommandableFragmentGenerator") << "Mode is Single; Sending copy of last event" << TLOG_ENDL;
874  for (auto& fragptr : dataBuffer_)
875  {
876  // Return the latest data point
877  auto frag = fragptr.get();
878  auto newfrag = std::unique_ptr<artdaq::Fragment>(new Fragment(ev_counter(), frag->fragmentID()));
879  newfrag->resize(frag->size() - detail::RawFragmentHeader::num_words());
880  memcpy(newfrag->headerAddress(), frag->headerAddress(), frag->sizeBytes());
881  newfrag->setTimestamp(requests_[ev_counter()]);
882  newfrag->setSequenceID(ev_counter());
883  frags.push_back(std::move(newfrag));
884  }
885  }
886  else
887  {
888  sendEmptyFragment(frags, ev_counter(), "No data for");
889  }
890  requests_.clear();
891  ev_counter_inc(1, true);
892  }
893  else if (mode_ == RequestMode::Buffer || mode_ == RequestMode::Window)
894  {
895  if (mode_ == RequestMode::Buffer || static_cast<size_t>(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - last_window_send_time_).count()) > missing_request_window_timeout_us_)
896  {
897  if (mode_ == RequestMode::Window)
898  {
899  TLOG_ERROR("CommandableFragmentGenerator") << "Data-taking has paused for " << std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - last_window_send_time_).count() << " us "
900  << "(> " << std::to_string(missing_request_window_timeout_us_) << " us) while waiting for missing data request messages." << " Sending Empty Fragments for missing requests!" << TLOG_ENDL;
901  } // else, Buffer mode, where it only makes sense to send for the last request
902  sendEmptyFragments(frags);
903  }
904  for (auto req = requests_.begin(); req != requests_.end();)
905  {
906  auto ts = req->second;
907  if (req->first < ev_counter())
908  {
909  req = requests_.erase(req);
910  continue;
911  }
912  while (req->first > ev_counter() && request_stop_requested_ && std::chrono::duration_cast<std::chrono::seconds>(std::chrono::steady_clock::now() - request_stop_timeout_).count() > 1)
913  {
914  sendEmptyFragment(frags, ev_counter(), "Missing request for");
915  ev_counter_inc(1, true);
916  }
917  if (req->first > ev_counter())
918  {
919  ++req;
920  continue; // Will loop through all requests, means we're in Window mode and missing the correct one
921  }
922  TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Checking that data exists for request window " << std::to_string(req->first) << " (Buffered mode will always succeed)" << TLOG_ENDL;
923  Fragment::timestamp_t min = ts > windowOffset_ ? ts - windowOffset_ : 0;
924  Fragment::timestamp_t max = min + windowWidth_;
925  TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: min is " << std::to_string(min) << ", max is " << std::to_string(max)
926  << " and last point in buffer is " << std::to_string((dataBuffer_.size() > 0 ? dataBuffer_.back()->timestamp() : 0)) << " (sz=" << std::to_string(dataBuffer_.size()) << ")" << TLOG_ENDL;
927  bool windowClosed = mode_ != RequestMode::Window || (dataBuffer_.size() > 0 && dataBuffer_.back()->timestamp() >= max);
928  bool windowTimeout = static_cast<size_t>(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - last_window_send_time_).count()) > window_close_timeout_us_;
929  if (windowTimeout)
930  {
931  TLOG_WARNING("CommandableFragmentGenerator") << "A timeout occurred waiting for data to close the request window (max=" << std::to_string(max) << ", buffer=" << std::to_string(dataBuffer_.back()->timestamp()) << "). Time waiting: "
932  << std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - last_window_send_time_).count() << " us "
933  << "(> " << std::to_string(window_close_timeout_us_) << " us)." << TLOG_ENDL;
934  }
935  if (windowClosed || !data_thread_running_ || windowTimeout)
936  {
937  TLOG_DEBUG("CommandableFragmentGenerator") << "Creating ContainerFragment for Buffered or Window-requested Fragments" << TLOG_ENDL;
938  frags.emplace_back(new artdaq::Fragment(ev_counter(), fragment_id()));
939  frags.back()->setTimestamp(ts);
940  ContainerFragmentLoader cfl(*frags.back());
941 
942  if (mode_ == RequestMode::Window && !windowClosed) cfl.set_missing_data(true);
943  if (mode_ == RequestMode::Window && dataBuffer_.size() > 0 && dataBuffer_.front()->timestamp() > min)
944  {
945  TLOG_DEBUG("CommandableFragmentGenerator") << "Request Window covers data that is either before data collection began or has fallen off the end of the buffer" << TLOG_ENDL;
946  cfl.set_missing_data(true);
947  }
948 
949  // Buffer mode TFGs should simply copy out the whole dataBuffer_ into a ContainerFragment
950  // Window mode TFGs must do a little bit more work to decide which fragments to send for a given request
951  for (auto it = dataBuffer_.begin(); it != dataBuffer_.end();)
952  {
953  if (mode_ == RequestMode::Window)
954  {
955  Fragment::timestamp_t fragT = (*it)->timestamp();
956  if (fragT < min || fragT > max || (fragT == max && windowWidth_ > 0))
957  {
958  ++it;
959  continue;
960  }
961  }
962 
963  TLOG_ARB(9, "CommandableFragmentGenerator") << "ApplyRequests: Adding Fragment with timestamp " << std::to_string((*it)->timestamp()) << " to Container" << TLOG_ENDL;
964  cfl.addFragment(*it);
965 
966  if (mode_ == RequestMode::Buffer || (mode_ == RequestMode::Window && uniqueWindows_))
967  {
968  it = dataBuffer_.erase(it);
969  }
970  else
971  {
972  ++it;
973  }
974  }
975  req = requests_.erase(req);
976  ev_counter_inc(1, true);
977  last_window_send_time_ = std::chrono::steady_clock::now();
978  }
979  else
980  {
981  // Wait for the window to be closed for the current event
982  break;
983  }
984  }
985  }
986  getDataBufferStats();
987  }
988 
989  if (frags.size() > 0)
990  TLOG_ARB(9, "CommandableFragmentGenerator") << "Finished Processing Event " << std::to_string(ev_counter() + 1) << " for fragment_id " << fragment_id() << "." << TLOG_ENDL;
991  return true;
992 }
993 
994 bool artdaq::CommandableFragmentGenerator::sendEmptyFragment(artdaq::FragmentPtrs& frags, size_t seqId, std::string desc)
995 {
996  TLOG_WARNING("CommandableFragmentGenerator") << desc << " request " << seqId << ", sending empty fragment" << TLOG_ENDL;
997  for (auto fid : fragment_ids_)
998  {
999  auto frag = new Fragment();
1000  frag->setSequenceID(seqId);
1001  frag->setFragmentID(fid);
1002  frag->setSystemType(Fragment::EmptyFragmentType);
1003  frags.emplace_back(FragmentPtr(frag));
1004  }
1005  return true;
1006 }
1007 
1009 {
1010  auto sequence_id = Fragment::InvalidSequenceID;
1011  auto timestamp = Fragment::InvalidTimestamp;
1012  // Map is ordered by sequence ID!
1013  TLOG_ARB(19, "CommandableFragmentGenerator") << "Sending Empty Fragments" << TLOG_ENDL;
1014  for (auto it = requests_.begin(); it != requests_.end();)
1015  {
1016  auto seq = it->first;
1017  auto ts = it->second;
1018 
1019  while (seq > ev_counter())
1020  {
1021  // Otherwise, this is just one we missed, send an empty
1022  sendEmptyFragment(frags, ev_counter(), "Missed request for");
1023  ev_counter_inc(1, true);
1024  }
1025 
1026  // Check if this is the one "true" request
1027  if (++it == requests_.end())
1028  {
1029  sequence_id = seq;
1030  timestamp = ts;
1031  break;
1032  }
1033  if (seq < ev_counter()) continue;
1034 
1035  }
1036  requests_.clear();
1037 
1038  if (sequence_id < ev_counter()) return; // No new requests received.
1039  requests_[sequence_id] = timestamp;
1040 }
int fragment_id() const
Get the current Fragment ID, if there is only one.
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
Definition: TCPConnect.cc:29
virtual ~CommandableFragmentGenerator()
CommandableFragmentGenerator Destructor.
bool sendEmptyFragment(FragmentPtrs &frags, size_t sequenceId, std::string desc)
Send an EmptyFragmentType Fragment.
void getMonitoringDataLoop()
This function regularly calls checkHWStatus_(), and sets the isHardwareOK flag accordingly.
void startDataThread()
Function that launches the data thread (getDataLoop())
std::string ReportCmd(std::string const &which="")
Get a report about a user-specified run-time quantity.
RequestMessageMode mode
Communicates additional information to the Request receiver.
bool isValid() const
Check the magic bytes of the packet.
bool dataBufferIsTooLarge()
Test the configured constraints on the data buffer.
void StopCmd(uint64_t timeout, uint64_t timestamp)
Stop the CommandableFragmentGenerator.
void StartCmd(int run, uint64_t timeout, uint64_t timestamp)
Start the CommandableFragmentGenerator.
bool check_stop()
Routine used by applyRequests to make sure that all outstanding requests have been fulfilled before r...
void ResumeCmd(uint64_t timeout, uint64_t timestamp)
Resume the CommandableFragmentGenerator.
CommandableFragmentGenerator()
CommandableFragmentGenerator default constructor.
bool getNext(FragmentPtrs &output) overridefinal
getNext calls either applyRequests or getNext_ to get any data that is ready to be sent to the EventB...
size_t ev_counter_inc(size_t step=1, bool force=false)
Increment the event counter, if the current RequestMode allows it.
Header of a RequestMessage. Contains magic bytes for validation and a count of expected RequestPacket...
uint32_t packet_count
The number of RequestPackets in this Request message.
void PauseCmd(uint64_t timeout, uint64_t timestamp)
Pause the CommandableFragmentGenerator.
void getDataLoop()
When separate_data_thread is set to true, this loop repeatedly calls getNext_ and adds returned Fragm...
The RequestPacket contains information about a single data request.
void startRequestReceiverThread()
Function that launches the data request receiver thread (receiveRequestsLoop())
void startMonitoringThread()
Function that launches the monitoring thread (getMonitoringDataLoop())
void checkDataBuffer()
Perform data buffer pruning operations. If the RequestMode is Single, removes all but the latest Frag...
std::string printMode_()
Return the string representation of the current RequestMode.
void sendEmptyFragments(FragmentPtrs &frags)
This function is for Buffered and Single request modes, as they can only respond to one data request ...
void getDataBufferStats()
Calculate the size of the dataBuffer and report appropriate metrics.
bool applyRequests(FragmentPtrs &output)
See if any requests have been received, and add the corresponding data Fragment objects to the output...
void receiveRequestsLoop()
This function receives data request packets, adding new requests to the request list.
void setupRequestListener()
Opens the socket used to listen for data requests.