artdaq  v3_00_03
SharedMemoryEventManager.cc
1 
2 #define TRACE_NAME (app_name + "_SharedMemoryEventManager").c_str()
3 
4 #include "artdaq/DAQrate/SharedMemoryEventManager.hh"
5 #include "artdaq-core/Core/StatisticsCollection.hh"
6 #include "artdaq-core/Utilities/TraceLock.hh"
7 #include <sys/wait.h>
8 
9 std::mutex artdaq::SharedMemoryEventManager::sequence_id_mutex_;
10 
11 artdaq::SharedMemoryEventManager::SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset)
12  : SharedMemoryManager(pset.get<uint32_t>("shared_memory_key", 0xBEE70000 + getpid()),
13  pset.get<size_t>("buffer_count"),
14  pset.has_key("max_event_size_bytes") ? pset.get<size_t>("max_event_size_bytes") : pset.get<size_t>("expected_fragments_per_event") * pset.get<size_t>("max_fragment_size_bytes"),
15  pset.get<size_t>("stale_buffer_timeout_usec", pset.get<size_t>("event_queue_wait_time", 5) * 1000000),
16  !pset.get<bool>("broadcast_mode", false))
17  , num_art_processes_(pset.get<size_t>("art_analyzer_count", 1))
18  , num_fragments_per_event_(pset.get<size_t>("expected_fragments_per_event"))
19  , queue_size_(pset.get<size_t>("buffer_count"))
20  , run_id_(0)
21  , subrun_id_(0)
22  , update_run_ids_(pset.get<bool>("update_run_ids_on_new_fragment", true))
23  , overwrite_mode_(!pset.get<bool>("use_art", true) || pset.get<bool>("overwrite_mode", false) || pset.get<bool>("broadcast_mode", false))
24  , send_init_fragments_(pset.get<bool>("send_init_fragments", true))
25  , buffer_writes_pending_()
26  , incomplete_event_report_interval_ms_(pset.get<int>("incomplete_event_report_interval_ms", -1))
27  , last_incomplete_event_report_time_(std::chrono::steady_clock::now())
28  , broadcast_timeout_ms_(pset.get<int>("fragment_broadcast_timeout_ms", 3000))
29  , broadcast_count_(0)
30  , subrun_event_count_(0)
31  , art_processes_()
32  , restart_art_(false)
33  , current_art_pset_(art_pset)
34  , requests_(pset)
35  , broadcasts_(pset.get<uint32_t>("broadcast_shared_memory_key", 0xCEE70000 + getpid()),
36  pset.get<size_t>("broadcast_buffer_count", 10),
37  pset.get<size_t>("broadcast_buffer_size", 0x100000),
38  pset.get<int>("fragment_broadcast_timeout_ms", 3000) * 1000, false)
39 {
40  SetMinWriteSize(sizeof(detail::RawEventHeader) + sizeof(detail::RawFragmentHeader));
41  broadcasts_.SetMinWriteSize(sizeof(detail::RawEventHeader) + sizeof(detail::RawFragmentHeader));
42 
43  if (pset.get<bool>("use_art", true) == false) {
44  TLOG(TLVL_INFO) << "BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:false" << TLOG_ENDL;
45  num_art_processes_ = 0;
46  }
47  else {
48  TLOG(TLVL_INFO) << "BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:true" << TLOG_ENDL;
49  TLOG(TLVL_TRACE) << "art_pset is " << art_pset.to_string() << TLOG_ENDL;
50  }
51  current_art_config_file_ = std::make_shared<art_config_file>(art_pset/*, GetKey(), GetBroadcastKey()*/);
52 
53  if (overwrite_mode_ && num_art_processes_ > 0)
54  {
55  TLOG(TLVL_WARNING) << "Art is configured to run, but overwrite mode is enabled! Check your configuration if this in unintentional!" << TLOG_ENDL;
56  }
57  else if (overwrite_mode_)
58  {
59  TLOG(TLVL_INFO) << "Overwrite Mode enabled, no configured art processes at startup" << TLOG_ENDL;
60  }
61 
62  for (size_t ii = 0; ii < size(); ++ii)
63  {
64  buffer_writes_pending_[ii] = 0;
65  }
66 
67  if (!IsValid()) throw cet::exception(app_name + "_SharedMemoryEventManager") << "Unable to attach to Shared Memory!";
68 
69  TLOG(TLVL_TRACE) << "Setting Writer rank to " << my_rank << TLOG_ENDL;
70  SetRank(my_rank);
71  TLOG(TLVL_DEBUG) << "Writer Rank is " << GetRank() << TLOG_ENDL;
72 
73 
74  TLOG(TLVL_TRACE) << "END CONSTRUCTOR" << TLOG_ENDL;
75 }
76 
78 {
79  TLOG(TLVL_TRACE) << "DESTRUCTOR" << TLOG_ENDL;
80  endOfData();
81  TLOG(TLVL_TRACE) << "Destructor END" << TLOG_ENDL;
82 }
83 
84 bool artdaq::SharedMemoryEventManager::AddFragment(detail::RawFragmentHeader frag, void* dataPtr)
85 {
86  TLOG(TLVL_TRACE) << "AddFragment(Header, ptr) BEGIN frag.word_count=" << std::to_string(frag.word_count)
87  << ", sequence_id=" << std::to_string(frag.sequence_id) << TLOG_ENDL;
88  auto buffer = getBufferForSequenceID_(frag.sequence_id, true, frag.timestamp);
89  TLOG(TLVL_TRACE) << "Using buffer " << std::to_string(buffer) << TLOG_ENDL;
90  if (buffer == -1) return false;
91  if (buffer == -2)
92  {
93  TLOG(TLVL_ERROR) << "Dropping event because data taking has already passed this event number: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
94  return true;
95  }
96 
97  auto hdr = getEventHeader_(buffer);
98  if (update_run_ids_)
99  {
100  hdr->run_id = run_id_;
101  hdr->subrun_id = subrun_id_;
102  }
103 
104  TLOG(TLVL_TRACE) << "AddFragment before Write calls" << TLOG_ENDL;
105  Write(buffer, dataPtr, frag.word_count * sizeof(RawDataType));
106 
107  TLOG(TLVL_TRACE) << "Checking for complete event" << TLOG_ENDL;
108  auto fragmentCount = GetFragmentCount(frag.sequence_id);
109  hdr->is_complete = fragmentCount == num_fragments_per_event_ && buffer_writes_pending_[buffer] == 0;
110  TLOG(TLVL_TRACE) << "hdr->is_complete=" << std::boolalpha << hdr->is_complete
111  << ", fragmentCount=" << std::to_string(fragmentCount)
112  << ", num_fragments_per_event=" << std::to_string(num_fragments_per_event_)
113  << ", buffer_writes_pending_[buffer]=" << std::to_string(buffer_writes_pending_[buffer]) << TLOG_ENDL;
114 
115  complete_buffer_(buffer);
116  requests_.SendRequest(true);
117 
118  TLOG(TLVL_TRACE) << "AddFragment END" << TLOG_ENDL;
119  return true;
120 }
121 
122 bool artdaq::SharedMemoryEventManager::AddFragment(FragmentPtr frag, size_t timeout_usec, FragmentPtr& outfrag)
123 {
124  TLOG(TLVL_TRACE) << "AddFragment(FragmentPtr) BEGIN" << TLOG_ENDL;
125  auto hdr = *reinterpret_cast<detail::RawFragmentHeader*>(frag->headerAddress());
126  auto data = frag->headerAddress();
127  auto start = std::chrono::steady_clock::now();
128  bool sts = false;
129  while (!sts && TimeUtils::GetElapsedTimeMicroseconds(start) < timeout_usec)
130  {
131  sts = AddFragment(hdr, data);
132  if (!sts) usleep(1000);
133  }
134  if (!sts)
135  {
136  outfrag = std::move(frag);
137  }
138  TLOG(TLVL_TRACE) << "AddFragment(FragmentPtr) RETURN " << std::boolalpha << sts << TLOG_ENDL;
139  return sts;
140 }
141 
142 artdaq::RawDataType* artdaq::SharedMemoryEventManager::WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable)
143 {
144  TLOG(14) << "WriteFragmentHeader BEGIN" << TLOG_ENDL;
145  auto buffer = getBufferForSequenceID_(frag.sequence_id, true, frag.timestamp);
146 
147  if (buffer < 0)
148  {
149  if (buffer == -1 && !dropIfNoBuffersAvailable) return nullptr;
150  if (buffer == -2)
151  {
152  TLOG(TLVL_ERROR) << "Dropping fragment because data taking has already passed this event number: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
153  }
154  else
155  {
156  TLOG(TLVL_ERROR) << "Dropping fragment because there is no room in the queue and reliable mode is off: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
157  }
158  dropped_data_.reset(new Fragment(frag.word_count - frag.num_words()));
159  return dropped_data_->dataBegin();
160  }
161 
162  buffer_writes_pending_[buffer]++;
163  TraceLock lk(buffer_mutexes_[buffer], 50, "WriteFragmentHeader");
164  Write(buffer, &frag, frag.num_words() * sizeof(RawDataType));
165 
166  auto pos = reinterpret_cast<RawDataType*>(GetWritePos(buffer));
167  if (frag.word_count - frag.num_words() > 0) {
168  IncrementWritePos(buffer, (frag.word_count - frag.num_words()) * sizeof(RawDataType));
169  }
170 
171  TLOG(14) << "WriteFragmentHeader END" << TLOG_ENDL;
172  return pos;
173 
174 }
175 
176 void artdaq::SharedMemoryEventManager::DoneWritingFragment(detail::RawFragmentHeader frag)
177 {
178  TLOG(TLVL_TRACE) << "DoneWritingFragment BEGIN" << TLOG_ENDL;
179  auto buffer = getBufferForSequenceID_(frag.sequence_id, false, frag.timestamp);
180  if (buffer == -1) Detach(true, "SharedMemoryEventManager", "getBufferForSequenceID_ returned -1 when it REALLY shouldn't have! Check program logic!");
181  if (buffer == -2) return;
182  TraceLock lk(buffer_mutexes_[buffer], 50, "DoneWritingFragment");
183 
184  auto hdr = getEventHeader_(buffer);
185  if (update_run_ids_)
186  {
187  hdr->run_id = run_id_;
188  hdr->subrun_id = subrun_id_;
189  }
190 
191  buffer_writes_pending_[buffer]--;
192  if (buffer_writes_pending_[buffer] != 0)
193  {
194  TLOG(TLVL_TRACE) << "Done writing fragment, but there's another writer. Not doing bookkeeping steps." << TLOG_ENDL;
195  return;
196  }
197  auto frag_count = GetFragmentCount(frag.sequence_id);
198  hdr->is_complete = frag_count == num_fragments_per_event_;
199 #if ART_SUPPORTS_DUPLICATE_EVENTS
200  if (!hdr->is_complete && released_incomplete_events_.count(frag.sequence_id)) {
201  hdr->is_complete = frag_count == released_incomplete_events_[frag.sequence_id] && buffer_writes_pending_[buffer] == 0;
202  }
203 #endif
204 
205  complete_buffer_(buffer);
206  requests_.SendRequest(true);
207  TLOG(TLVL_TRACE) << "DoneWritingFragment END" << TLOG_ENDL;
208 }
209 
210 size_t artdaq::SharedMemoryEventManager::GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type)
211 {
212  return GetFragmentCountInBuffer(getBufferForSequenceID_(seqID, false), type);
213 }
214 
215 size_t artdaq::SharedMemoryEventManager::GetFragmentCountInBuffer(int buffer, Fragment::type_t type)
216 {
217  if (buffer == -1) return 0;
218  ResetReadPos(buffer);
219  IncrementReadPos(buffer, sizeof(detail::RawEventHeader));
220 
221  size_t count = 0;
222 
223  while (MoreDataInBuffer(buffer))
224  {
225  auto fragHdr = reinterpret_cast<artdaq::detail::RawFragmentHeader*>(GetReadPos(buffer));
226  IncrementReadPos(buffer, fragHdr->word_count * sizeof(RawDataType));
227  if (type != Fragment::InvalidFragmentType && fragHdr->type != type) continue;
228  TLOG_TRACE("GetFragmentCount") << "Adding Fragment with size=" << std::to_string(fragHdr->word_count) << " to Fragment count" << TLOG_ENDL;
229  ++count;
230  }
231 
232  return count;
233 }
234 
235 void artdaq::SharedMemoryEventManager::RunArt(std::shared_ptr<art_config_file> config_file, pid_t& pid_out)
236 {
237  while (restart_art_)
238  {
239  send_init_frag_();
240  TLOG(TLVL_INFO) << "Starting art process with config file " << config_file->getFileName() << TLOG_ENDL;
241  std::vector<char*> args{ (char*)"art", (char*)"-c", &config_file->getFileName()[0], NULL };
242 
243  auto pid = fork();
244  if (pid == 0)
245  { /* child */
246  execvp("art", &args[0]);
247  exit(1);
248  }
249  pid_out = pid;
250 
251  TLOG(TLVL_INFO) << "PID of new art process is " << pid << TLOG_ENDL;
252  art_processes_.insert(pid);
253  int status;
254  waitpid(pid, &status, 0);
255  TLOG(TLVL_INFO) << "Removing PID " << pid << " from process list" << TLOG_ENDL;
256  art_processes_.erase(pid);
257  if (status == 0)
258  {
259  TLOG(TLVL_INFO) << "art process " << pid << " exited normally, " << (restart_art_ ? "restarting" : "not restarting") << TLOG_ENDL;
260  }
261  else
262  {
263  TLOG(TLVL_WARNING) << "art process " << pid << " exited with status code 0x" << std::hex << status << " (" << std::dec << status << "), " << (restart_art_ ? "restarting" : "not restarting") << TLOG_ENDL;
264  }
265  }
266 }
267 
269 {
270  restart_art_ = true;
271  if (num_art_processes_ == 0) return;
272  for (size_t ii = 0; ii < num_art_processes_; ++ii)
273  {
274  StartArtProcess(current_art_pset_);
275  }
276 }
277 
279 {
280  static std::mutex start_art_mutex;
281  TraceLock lk(start_art_mutex, 15, "StartArtLock");
282  restart_art_ = true;
283  auto initialCount = GetAttachedCount();
284  auto startTime = std::chrono::steady_clock::now();
285 
286  if (pset != current_art_pset_)
287  {
288  current_art_pset_ = pset;
289  current_art_config_file_ = std::make_shared<art_config_file>(pset/*, GetKey(), GetBroadcastKey()*/);
290  }
291  pid_t pid = -1;
292  boost::thread thread([&] {RunArt(current_art_config_file_, pid); });
293  thread.detach();
294 
295 
296  while ((GetAttachedCount() - initialCount < 1 || pid <= 0)
297  && TimeUtils::GetElapsedTime(startTime) < 5)
298  {
299  usleep(1000);
300  }
301  if (GetAttachedCount() - initialCount < 1 || pid <= 0)
302  {
303  TLOG(TLVL_WARNING) << "art process has not started after 5s. Check art configuration!"
304  << " (pid=" << pid << ", attachedCount=" << std::to_string(GetAttachedCount() - initialCount) << ")" << TLOG_ENDL;
305  return 0;
306  }
307  else
308  {
309  TLOG(TLVL_INFO) << std::setw(4) << std::fixed << "art initialization took "
310  << TimeUtils::GetElapsedTime(startTime) << " seconds." << TLOG_ENDL;
311 
312  return pid;
313  }
314 
315 }
316 
318 {
319  restart_art_ = false;
320  current_art_config_file_ = nullptr;
321  current_art_pset_ = fhicl::ParameterSet();
322 
323  for (auto pid : pids)
324  {
325  if (kill(pid, 0) >= 0)
326  {
327  pids.erase(pid);
328  }
329  }
330  if (pids.size() == 0)
331  {
332  TLOG(14) << "All art processes already exited, nothing to do." << TLOG_ENDL;
333  usleep(1000);
334  return;
335  }
336 
337  TLOG(TLVL_TRACE) << "Gently informing art processes that it is time to shut down" << TLOG_ENDL;
338  for (auto pid : pids)
339  {
340  kill(pid, SIGQUIT);
341  }
342 
343  int graceful_wait_ms = 1000;
344  int int_wait_ms = 100;
345 
346  TLOG(TLVL_TRACE) << "Waiting up to " << graceful_wait_ms << " ms for all art processes to exit gracefully" << TLOG_ENDL;
347  for (int ii = 0; ii < graceful_wait_ms; ++ii)
348  {
349  usleep(1000);
350 
351  for (auto pid : pids)
352  {
353  if (kill(pid, 0) < 0)
354  {
355  pids.erase(pid);
356  }
357  }
358  if (pids.size() == 0)
359  {
360  TLOG(TLVL_TRACE) << "All art processes exited after " << ii << " ms." << TLOG_ENDL;
361  return;
362  }
363  }
364 
365  TLOG(TLVL_TRACE) << "Insisting that the art processes shut down" << TLOG_ENDL;
366  for (auto pid : pids)
367  {
368  kill(pid, SIGINT);
369  }
370 
371  TLOG(TLVL_TRACE) << "Waiting up to " << int_wait_ms << " ms for all art processes to exit" << TLOG_ENDL;
372  for (int ii = graceful_wait_ms; ii < graceful_wait_ms + int_wait_ms; ++ii)
373  {
374  usleep(1000);
375 
376  for (auto pid : pids)
377  {
378  if (kill(pid, 0) < 0)
379  {
380  pids.erase(pid);
381  }
382  }
383 
384  if (pids.size() == 0)
385  {
386  TLOG(TLVL_TRACE) << "All art processes exited after " << ii << " ms." << TLOG_ENDL;
387  return;
388  }
389  }
390 
391  TLOG(TLVL_TRACE) << "Killing remaning art processes with extreme prejudice" << TLOG_ENDL;
392  while (pids.size() > 0)
393  {
394  kill(*pids.begin(), SIGKILL);
395  }
396 }
397 
398 void artdaq::SharedMemoryEventManager::ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun, int n_art_processes)
399 {
400  TLOG(TLVL_DEBUG) << "ReconfigureArt BEGIN" << TLOG_ENDL;
401  if (restart_art_) // Art is running
402  {
403  endOfData();
404  }
405  for (size_t ii = 0; ii < broadcasts_.size(); ++ii)
406  {
407  broadcasts_.MarkBufferEmpty(ii, true);
408  }
409  if (newRun == 0) newRun = run_id_ + 1;
410  current_art_pset_ = art_pset;
411  current_art_config_file_ = std::make_shared<art_config_file>(art_pset/*, GetKey(), GetBroadcastKey()*/);
412 
413  if (n_art_processes != -1)
414  {
415  TLOG(TLVL_INFO) << "Setting number of art processes to " << n_art_processes << TLOG_ENDL;
416  num_art_processes_ = n_art_processes;
417  }
418  startRun(newRun);
419  TLOG(TLVL_DEBUG) << "ReconfigureArt END" << TLOG_ENDL;
420 }
421 
423 {
424  init_fragment_.reset(nullptr);
425  TLOG(TLVL_TRACE) << "SharedMemoryEventManager::endOfData" << TLOG_ENDL;
426  restart_art_ = false;
427 
428  size_t initialStoreSize = GetIncompleteEventCount();
429  TLOG(TLVL_TRACE) << "endOfData: Flushing " << initialStoreSize
430  << " stale events from the SharedMemoryEventManager." << TLOG_ENDL;
431  int counter = initialStoreSize;
432  while (active_buffers_.size() > 0 && counter > 0)
433  {
434  complete_buffer_(*active_buffers_.begin());
435  counter--;
436  }
437  TLOG(TLVL_TRACE) << "endOfData: Done flushing, there are now " << GetIncompleteEventCount()
438  << " stale events in the SharedMemoryEventManager." << TLOG_ENDL;
439 
440 
441  TLOG(TLVL_TRACE) << "Waiting for " << std::to_string(ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_))) << " outstanding buffers..." << TLOG_ENDL;
442  auto start = std::chrono::steady_clock::now();
443  auto lastReadCount = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
444 
445  // We will wait until no buffer has been read for 1 second.
446  while (lastReadCount > 0 && TimeUtils::GetElapsedTime(start) < 1)
447  {
448  auto temp = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
449  if (temp != lastReadCount)
450  {
451  TLOG(TLVL_TRACE) << "Waiting for " << std::to_string(temp) << " outstanding buffers..." << TLOG_ENDL;
452  lastReadCount = temp;
453  start = std::chrono::steady_clock::now();
454  }
455  if (lastReadCount > 0) usleep(1000);
456  }
457 
458  TLOG(TLVL_TRACE) << "endOfData: Broadcasting EndOfData Fragment" << TLOG_ENDL;
459  FragmentPtr outFrag = std::move(Fragment::eodFrag(GetBufferCount()));
460  bool success = broadcastFragment_(std::move(outFrag), outFrag);
461  if (!success)
462  {
463  TLOG(TLVL_TRACE) << "endOfData: Clearing buffers to make room for EndOfData Fragment" << TLOG_ENDL;
464  for (size_t ii = 0; ii < size(); ++ii)
465  {
466  broadcasts_.MarkBufferEmpty(ii, true);
467  }
468  broadcastFragment_(std::move(outFrag), outFrag);
469  }
470 
471  TLOG(TLVL_DEBUG) << "Waiting for all art processes to exit, there are " << std::to_string(art_processes_.size()) << " remaining." << TLOG_ENDL;
472  while (art_processes_.size() > 0)
473  {
474  ShutdownArtProcesses(art_processes_);
475  }
476  ResetAttachedCount();
477 
478  TLOG(TLVL_TRACE) << "endOfData: Clearing buffers" << TLOG_ENDL;
479  for (size_t ii = 0; ii < size(); ++ii)
480  {
481  MarkBufferEmpty(ii, true);
482  }
483  released_incomplete_events_.clear();
484 
485  TLOG(TLVL_TRACE) << "endOfData END" << TLOG_ENDL;
486  TLOG(TLVL_INFO) << "EndOfData Complete. There were " << GetLastSeenBufferID() << " events processed in this run." << TLOG_ENDL;
487  return true;
488 }
489 
491 {
492  init_fragment_.reset(nullptr);
493  StartArt();
494  run_id_ = runID;
495  subrun_id_ = 1;
496  requests_.SendRoutingToken(queue_size_);
497  TLOG(TLVL_DEBUG) << "Starting run " << run_id_
498  << ", max queue size = "
499  << queue_size_
500  << ", queue size = "
501  << GetLockedBufferCount() << TLOG_ENDL;
502  if (metricMan)
503  {
504  double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
505  metricMan->sendMetric("Run Number", runSubrun, "Run:Subrun", 1, MetricMode::LastPoint);
506  }
507 }
508 
510 {
511  ++subrun_id_;
512  if (metricMan)
513  {
514  double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
515  metricMan->sendMetric("Run Number", runSubrun, "Run:Subrun", 1, MetricMode::LastPoint);
516  }
517 }
518 
520 {
521  FragmentPtr endOfRunFrag(new
522  Fragment(static_cast<size_t>
523  (ceil(sizeof(my_rank) /
524  static_cast<double>(sizeof(Fragment::value_type))))));
525 
526  endOfRunFrag->setSystemType(Fragment::EndOfRunFragmentType);
527  *endOfRunFrag->dataBegin() = my_rank;
528  broadcastFragment_(std::move(endOfRunFrag), endOfRunFrag);
529 
530  return true;
531 }
532 
534 {
535  std::unique_ptr<artdaq::Fragment>
536  endOfSubrunFrag(new
537  Fragment(static_cast<size_t>
538  (ceil(sizeof(my_rank) /
539  static_cast<double>(sizeof(Fragment::value_type))))));
540 
541  endOfSubrunFrag->setSystemType(Fragment::EndOfSubrunFragmentType);
542  *endOfSubrunFrag->dataBegin() = my_rank;
543 
544  broadcastFragment_(std::move(endOfSubrunFrag), endOfSubrunFrag);
545 
546  TLOG(TLVL_INFO) << "Subrun " << subrun_id_ << " in run " << run_id_ << " has ended. There were " << subrun_event_count_ << " events in this subrun." << TLOG_ENDL;
547  subrun_event_count_ = 0;
548 
549  return true;
550 }
551 
553 {
554  if (metricMan)
555  {
556  metricMan->sendMetric("Incomplete Event Count", GetIncompleteEventCount(), "events", 1, MetricMode::LastPoint);
557  metricMan->sendMetric("Pending Event Count", GetPendingEventCount(), "events", 1, MetricMode::LastPoint);
558  }
559 
560  if (incomplete_event_report_interval_ms_ > 0 && GetLockedBufferCount())
561  {
562  if (TimeUtils::GetElapsedTimeMilliseconds(last_incomplete_event_report_time_) < static_cast<size_t>(incomplete_event_report_interval_ms_))
563  return;
564 
565  last_incomplete_event_report_time_ = std::chrono::steady_clock::now();
566  std::ostringstream oss;
567  oss << "Incomplete Events (" << num_fragments_per_event_ << "): ";
568  for (auto& ev : active_buffers_)
569  {
570  auto hdr = getEventHeader_(ev);
571  oss << hdr->sequence_id << " (" << GetFragmentCount(hdr->sequence_id) << "), ";
572  }
573  TLOG(TLVL_DEBUG) << oss.str() << TLOG_ENDL;
574  }
575 }
576 
577 bool artdaq::SharedMemoryEventManager::broadcastFragment_(FragmentPtr frag, FragmentPtr& outFrag)
578 {
579  auto buffer = broadcasts_.GetBufferForWriting(false);
580  auto start_time = std::chrono::steady_clock::now();
581  while (buffer == -1 && TimeUtils::GetElapsedTimeMilliseconds(start_time) < static_cast<size_t>(broadcast_timeout_ms_))
582  {
583  usleep(10000);
584  buffer = broadcasts_.GetBufferForWriting(false);
585  }
586  if (buffer == -1)
587  {
588  TLOG(TLVL_ERROR) << "Broadcast of fragment type " << frag->typeString() << " failed due to timeout waiting for buffer!" << TLOG_ENDL;
589  outFrag.swap(frag);
590  return false;
591  }
592 
593  auto hdr = reinterpret_cast<detail::RawEventHeader*>(broadcasts_.GetBufferStart(buffer));
594  hdr->run_id = run_id_;
595  hdr->subrun_id = subrun_id_;
596  hdr->sequence_id = frag->sequenceID();
597  hdr->is_complete = true;
598  broadcasts_.IncrementWritePos(buffer, sizeof(detail::RawEventHeader));
599 
600  TLOG(TLVL_TRACE) << "broadcastFragment_ before Write calls" << TLOG_ENDL;
601  broadcasts_.Write(buffer, frag->headerAddress(), frag->size() * sizeof(RawDataType));
602 
603  broadcasts_.MarkBufferFull(buffer, -1);
604  outFrag.swap(frag);
605  return true;
606 }
607 
608 artdaq::detail::RawEventHeader* artdaq::SharedMemoryEventManager::getEventHeader_(int buffer)
609 {
610  return reinterpret_cast<detail::RawEventHeader*>(GetBufferStart(buffer));
611 }
612 
613 int artdaq::SharedMemoryEventManager::getBufferForSequenceID_(Fragment::sequence_id_t seqID, bool create_new, Fragment::timestamp_t timestamp)
614 {
615  std::unique_lock<std::mutex> lk(sequence_id_mutex_);
616  TLOG(14) << "getBufferForSequenceID " << std::to_string(seqID) << " BEGIN" << TLOG_ENDL;
617  auto buffers = GetBuffersOwnedByManager();
618  for (auto& buf : buffers)
619  {
620  auto hdr = getEventHeader_(buf);
621  if (hdr->sequence_id == seqID)
622  {
623  TLOG(14) << "getBufferForSequenceID " << std::to_string(seqID) << " returning " << buf << TLOG_ENDL;
624  return buf;
625  }
626  }
627 
628 #if !ART_SUPPORTS_DUPLICATE_EVENTS
629  if (released_incomplete_events_.count(seqID)) {
630  TLOG(TLVL_ERROR) << "Buffer has already been marked \"Incomplete\" and sent to art!" << TLOG_ENDL;
631  return -2;
632  }
633 #endif
634 
635  if (!create_new) return -1;
636 
637  check_pending_buffers_(lk);
638  int new_buffer = GetBufferForWriting(false);
639 
640  if (new_buffer == -1)
641  {
642  new_buffer = GetBufferForWriting(overwrite_mode_);
643  }
644 
645  if (new_buffer == -1) return -1;
646  TraceLock(buffer_mutexes_[new_buffer], 34, "getBufferForSequenceID");
647  auto hdr = getEventHeader_(new_buffer);
648  hdr->is_complete = false;
649  hdr->run_id = run_id_;
650  hdr->subrun_id = subrun_id_;
651  hdr->sequence_id = seqID;
652  buffer_writes_pending_[new_buffer] = 0;
653  IncrementWritePos(new_buffer, sizeof(detail::RawEventHeader));
654 
655  active_buffers_.insert(new_buffer);
656 
657  if (timestamp != Fragment::InvalidTimestamp)
658  {
659  requests_.AddRequest(seqID, timestamp);
660  }
661  requests_.SendRequest();
662  TLOG(14) << "getBufferForSequenceID " << std::to_string(seqID) << " returning newly initialized buffer " << new_buffer << TLOG_ENDL;
663  return new_buffer;
664 }
665 
666 bool artdaq::SharedMemoryEventManager::hasFragments_(int buffer)
667 {
668  if (buffer == -1) return true;
669  if (!CheckBuffer(buffer, BufferSemaphoreFlags::Writing))
670  {
671  return true;
672  }
673  ResetReadPos(buffer);
674  IncrementReadPos(buffer, sizeof(detail::RawEventHeader));
675  return MoreDataInBuffer(buffer);
676 }
677 
678 void artdaq::SharedMemoryEventManager::complete_buffer_(int buffer)
679 {
680  auto hdr = getEventHeader_(buffer);
681  if (hdr->is_complete)
682  {
683  TLOG(TLVL_DEBUG) << "complete_buffer_: This fragment completes event " << std::to_string(hdr->sequence_id) << "." << TLOG_ENDL;
684 
685  requests_.RemoveRequest(hdr->sequence_id);
686  requests_.SendRoutingToken(1);
687  {
688  std::unique_lock<std::mutex> lk(sequence_id_mutex_);
689  active_buffers_.erase(buffer);
690  pending_buffers_.insert(buffer);
691  }
692  }
693  check_pending_buffers_();
694 }
695 
696 bool artdaq::SharedMemoryEventManager::bufferComparator(int bufA, int bufB)
697 {
698  return getEventHeader_(bufA)->sequence_id < getEventHeader_(bufB)->sequence_id;
699 }
700 
701 void artdaq::SharedMemoryEventManager::check_pending_buffers_(std::unique_lock<std::mutex> const& lock)
702 {
703  TLOG(TLVL_TRACE) << "check_pending_buffers_ BEGIN Locked=" << std::boolalpha << lock.owns_lock() << TLOG_ENDL;
704 
705  auto buffers = GetBuffersOwnedByManager();
706  for (auto buf : buffers)
707  {
708  if (ResetBuffer(buf) && !pending_buffers_.count(buf))
709  {
710  auto hdr = getEventHeader_(buf);
711  if (active_buffers_.count(buf))
712  {
713  TLOG(TLVL_WARNING) << "Active event " << std::to_string(hdr->sequence_id) << " is stale. Scheduling release of incomplete event to art." << TLOG_ENDL;
714  requests_.RemoveRequest(hdr->sequence_id);
715  requests_.SendRoutingToken(1);
716  active_buffers_.erase(buf);
717  pending_buffers_.insert(buf);
718  if (!released_incomplete_events_.count(hdr->sequence_id)) {
719  released_incomplete_events_[hdr->sequence_id] = num_fragments_per_event_ - GetFragmentCountInBuffer(buf);
720  }
721  else {
722  released_incomplete_events_[hdr->sequence_id] -= GetFragmentCountInBuffer(buf);
723  }
724  }
725 
726  }
727  }
728 
729  Fragment::sequence_id_t lowestSeqId = Fragment::InvalidSequenceID;
730 
731  // Only use "weak ordering" when buffers are available for writing
732  if (WriteReadyCount(false) != 0)
733  {
734  for (auto buf : active_buffers_)
735  {
736  auto hdr = getEventHeader_(buf);
737  TLOG(TLVL_TRACE) << "Buffer: " << buf << ", SeqID: " << std::to_string(hdr->sequence_id) << ", ACTIVE" << TLOG_ENDL;
738  if (hdr->sequence_id < lowestSeqId)
739  {
740  lowestSeqId = hdr->sequence_id;
741  }
742  }
743  TLOG(TLVL_TRACE) << "Lowest SeqID held: " << std::to_string(lowestSeqId) << TLOG_ENDL;
744  }
745 
746  std::list<int> sorted_buffers(pending_buffers_.begin(), pending_buffers_.end());
747  sorted_buffers.sort([this](int a, int b) {return bufferComparator(a, b); });
748  for (auto buf : sorted_buffers)
749  {
750  auto hdr = getEventHeader_(buf);
751  if (hdr->sequence_id > lowestSeqId) break;
752  TLOG(TLVL_DEBUG) << "Releasing event " << std::to_string(hdr->sequence_id) << " in buffer " << buf << " to art." << TLOG_ENDL;
753  MarkBufferFull(buf);
754  subrun_event_count_++;
755  pending_buffers_.erase(buf);
756  }
757 
758  TLOG(TLVL_TRACE) << "check_pending_buffers_: Sending Metrics" << TLOG_ENDL;
759  if (metricMan)
760  {
761  auto full = ReadReadyCount();
762  auto empty = WriteReadyCount(overwrite_mode_);
763  auto total = size();
764  metricMan->sendMetric("Shared Memory Full Buffers", full, "buffers", 2, MetricMode::LastPoint);
765  metricMan->sendMetric("Shared Memory Available Buffers", empty, "buffers", 2, MetricMode::LastPoint);
766  metricMan->sendMetric("Shared Memory Full %", full * 100 / static_cast<double>(total), "%", 2, MetricMode::LastPoint);
767  metricMan->sendMetric("Shared Memory Available %", empty * 100 / static_cast<double>(total), "%", 2, MetricMode::LastPoint);
768  }
769  TLOG(TLVL_TRACE) << "check_pending_buffers_ END" << TLOG_ENDL;
770 }
771 
772 void artdaq::SharedMemoryEventManager::send_init_frag_()
773 {
774  if (init_fragment_ != nullptr)
775  {
776  TLOG(TLVL_TRACE) << "Sending init Fragment to art..." << TLOG_ENDL;
777 
778 #if 0
779  std::string fileName = "receiveInitMessage_" + std::to_string(my_rank) + ".bin";
780  std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
781  ostream.write(reinterpret_cast<char*>(init_fragment_->dataBeginBytes()), init_fragment_->dataSizeBytes());
782  ostream.close();
783 #endif
784 
785  broadcastFragment_(std::move(init_fragment_), init_fragment_);
786  TLOG(TLVL_TRACE) << "Init Fragment sent" << TLOG_ENDL;
787  }
788  else if (send_init_fragments_)
789  {
790  TLOG(TLVL_WARNING) << "Cannot send init fragment because I haven't yet received one!" << TLOG_ENDL;
791  }
792 }
793 
795 {
796  if (!init_fragment_ || init_fragment_ == nullptr)
797  {
798  init_fragment_.swap(frag);
799  send_init_frag_();
800  }
801 }
void RunArt(std::shared_ptr< art_config_file > config_file, pid_t &pid_out)
Run an art instance, recording the return codes and restarting it until the end flag is raised...
virtual ~SharedMemoryEventManager()
SharedMemoryEventManager Destructor.
void ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun=0, int n_art_processes=-1)
Restart all art processes, using the given fhicl code to configure the new art processes.
pid_t StartArtProcess(fhicl::ParameterSet pset)
Start one art process.
RawDataType * WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable=false)
Get a pointer to a reserved memory area for the given Fragment header.
RawEvent::run_id_t run_id_t
Copy RawEvent::run_id_t into local scope.
size_t GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in an event.
void StartArt()
Start all the art processes.
void ShutdownArtProcesses(std::set< pid_t > pids)
Shutdown a set of art processes.
void SetInitFragment(FragmentPtr frag)
Set the stored Init fragment, if one has not yet been set already.
void sendMetrics()
Send metrics to the MetricManager, if one has been instantiated in the application.
void startSubrun()
Start a new Subrun, incrementing the subrun number.
SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset)
SharedMemoryEventManager Constructor.
bool endSubrun()
Send an EndOfSubRunFragment to the art thread.
bool endRun()
Send an EndOfRunFragment to the art thread.
void DoneWritingFragment(detail::RawFragmentHeader frag)
Used to indicate that the given Fragment is now completely in the buffer. Will check for buffer compl...
bool endOfData()
Indicate that the end of input has been reached to the art processes.
void startRun(run_id_t runID)
Start a Run.
size_t GetFragmentCountInBuffer(int buffer, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in a buffer.