artdaq  v3_00_01
SharedMemoryEventManager.cc
1 #include "artdaq/DAQrate/SharedMemoryEventManager.hh"
2 #include "artdaq-core/Core/StatisticsCollection.hh"
3 #include "artdaq-core/Utilities/TraceLock.hh"
4 #include <sys/wait.h>
5 #include "SharedMemoryEventManager.hh"
6 
7 artdaq::SharedMemoryEventManager::SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset)
8  : SharedMemoryManager(pset.get<uint32_t>("shared_memory_key", 0xBEE70000 + getpid()),
9  pset.get<size_t>("buffer_count"),
10  pset.has_key("max_event_size_bytes") ? pset.get<size_t>("max_event_size_bytes") : pset.get<size_t>("expected_fragments_per_event") * pset.get<size_t>("max_fragment_size_bytes"),
11  pset.get<size_t>("stale_buffer_timeout_usec", pset.get<size_t>("event_queue_wait_time", 5) * 1000000),
12  !pset.get<bool>("broadcast_mode", false))
13  , num_art_processes_(pset.get<size_t>("art_analyzer_count", 1))
14  , num_fragments_per_event_(pset.get<size_t>("expected_fragments_per_event"))
15  , queue_size_(pset.get<size_t>("buffer_count"))
16  , run_id_(0)
17  , subrun_id_(0)
18  , update_run_ids_(pset.get<bool>("update_run_ids_on_new_fragment", true))
19  , overwrite_mode_(!pset.get<bool>("use_art", true) || pset.get<bool>("overwrite_mode", false) || pset.get<bool>("broadcast_mode", false))
20  , send_init_fragments_(pset.get<bool>("send_init_fragments", true))
21  , buffer_writes_pending_()
22  , incomplete_event_report_interval_ms_(pset.get<int>("incomplete_event_report_interval_ms", -1))
23  , last_incomplete_event_report_time_(std::chrono::steady_clock::now())
24  , broadcast_timeout_ms_(pset.get<int>("fragment_broadcast_timeout_ms", 3000))
25  , broadcast_count_(0)
26  , subrun_event_count_(0)
27  , art_processes_()
28  , restart_art_(false)
29  , current_art_pset_(art_pset)
30  , requests_(pset)
31  , broadcasts_(pset.get<uint32_t>("broadcast_shared_memory_key", 0xCEE70000 + getpid()),
32  pset.get<size_t>("broadcast_buffer_count", 10),
33  pset.get<size_t>("broadcast_buffer_size", 0x100000),
34  pset.get<int>("fragment_broadcast_timeout_ms", 3000) * 1000, false)
35 {
36  SetMinWriteSize(sizeof(detail::RawEventHeader) + sizeof(detail::RawFragmentHeader));
37  broadcasts_.SetMinWriteSize(sizeof(detail::RawEventHeader) + sizeof(detail::RawFragmentHeader));
38 
39  if (pset.get<bool>("use_art", true) == false) {
40  TLOG_INFO("SharedMemoryEventManager") << "BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:false" << TLOG_ENDL;
41  num_art_processes_ = 0;
42  }
43  else {
44  TLOG_INFO("SharedMemoryEventManager") << "BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:true" << TLOG_ENDL;
45  TLOG_TRACE("SharedMemoryEventManager") << "art_pset is " << art_pset.to_string() << TLOG_ENDL;
46  }
47  current_art_config_file_ = std::make_shared<art_config_file>(art_pset/*, GetKey(), GetBroadcastKey()*/);
48 
49  if (overwrite_mode_ && num_art_processes_ > 0)
50  {
51  TLOG_WARNING("SharedMemoryEventManager") << "Art is configured to run, but overwrite mode is enabled! Check your configuration if this in unintentional!" << TLOG_ENDL;
52  }
53  else if (overwrite_mode_)
54  {
55  TLOG_INFO("SharedMemoryEventManager") << "Overwrite Mode enabled, no configured art processes at startup" << TLOG_ENDL;
56  }
57 
58  for (size_t ii = 0; ii < size(); ++ii)
59  {
60  buffer_writes_pending_[ii] = 0;
61  }
62 
63  if (!IsValid()) throw cet::exception("SharedMemoryEventManager") << "Unable to attach to Shared Memory!";
64 
65  TLOG_TRACE("SharedMemoryEventManager") << "Setting Writer rank to " << my_rank << TLOG_ENDL;
66  SetRank(my_rank);
67  TLOG_DEBUG("SharedMemoryEventManager") << "Writer Rank is " << GetRank() << TLOG_ENDL;
68 
69 
70  TLOG_TRACE("SharedMemoryEventManager") << "END CONSTRUCTOR" << TLOG_ENDL;
71 }
72 
74 {
75  TLOG_TRACE("SharedMemoryEventManager") << "DESTRUCTOR" << TLOG_ENDL;
76  endOfData();
77  TLOG_TRACE("SharedMemoryEventManager") << "Destructor END" << TLOG_ENDL;
78 }
79 
80 bool artdaq::SharedMemoryEventManager::AddFragment(detail::RawFragmentHeader frag, void* dataPtr)
81 {
82  TLOG_TRACE("SharedMemoryEventManager") << "AddFragment(Header, ptr) BEGIN frag.word_count=" << std::to_string(frag.word_count)
83  << ", sequence_id=" << std::to_string(frag.sequence_id) << TLOG_ENDL;
84  auto buffer = getBufferForSequenceID_(frag.sequence_id, true, frag.timestamp);
85  TLOG_TRACE("SharedMemoryEventManager") << "Using buffer " << std::to_string(buffer) << TLOG_ENDL;
86  if (buffer == -1) return false;
87  if (buffer == -2)
88  {
89  TLOG_ERROR("SharedMemoryEventManager") << "Dropping event because data taking has already passed this event number: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
90  return true;
91  }
92 
93  auto hdr = getEventHeader_(buffer);
94  if (update_run_ids_)
95  {
96  hdr->run_id = run_id_;
97  hdr->subrun_id = subrun_id_;
98  }
99 
100  TLOG_TRACE("SharedMemoryEventManager") << "AddFragment before Write calls" << TLOG_ENDL;
101  Write(buffer, dataPtr, frag.word_count * sizeof(RawDataType));
102 
103  TLOG_TRACE("SharedMemoryEventManager") << "Checking for complete event" << TLOG_ENDL;
104  auto fragmentCount = GetFragmentCount(frag.sequence_id);
105  hdr->is_complete = fragmentCount == num_fragments_per_event_ && buffer_writes_pending_[buffer] == 0;
106  TLOG_TRACE("SharedMemoryEventManager") << "hdr->is_complete=" << std::boolalpha << hdr->is_complete
107  << ", fragmentCount=" << std::to_string(fragmentCount)
108  << ", num_fragments_per_event=" << std::to_string(num_fragments_per_event_)
109  << ", buffer_writes_pending_[buffer]=" << std::to_string(buffer_writes_pending_[buffer]) << TLOG_ENDL;
110 
111  complete_buffer_(buffer);
112  requests_.SendRequest(true);
113 
114  TLOG_TRACE("SharedMemoryEventManager") << "AddFragment END" << TLOG_ENDL;
115  return true;
116 }
117 
118 bool artdaq::SharedMemoryEventManager::AddFragment(FragmentPtr frag, size_t timeout_usec, FragmentPtr& outfrag)
119 {
120  TLOG_TRACE("SharedMemoryEventManager") << "AddFragment(FragmentPtr) BEGIN" << TLOG_ENDL;
121  auto hdr = *reinterpret_cast<detail::RawFragmentHeader*>(frag->headerAddress());
122  auto data = frag->headerAddress();
123  auto start = std::chrono::steady_clock::now();
124  bool sts = false;
125  while (!sts && TimeUtils::GetElapsedTimeMicroseconds(start) < timeout_usec)
126  {
127  sts = AddFragment(hdr, data);
128  if (!sts) usleep(1000);
129  }
130  if (!sts)
131  {
132  outfrag = std::move(frag);
133  }
134  TLOG_TRACE("SharedMemoryEventManager") << "AddFragment(FragmentPtr) RETURN " << std::boolalpha << sts << TLOG_ENDL;
135  return sts;
136 }
137 
138 artdaq::RawDataType* artdaq::SharedMemoryEventManager::WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable)
139 {
140  TLOG_ARB(14, "SharedMemoryEventManager") << "WriteFragmentHeader BEGIN" << TLOG_ENDL;
141  auto buffer = getBufferForSequenceID_(frag.sequence_id, true, frag.timestamp);
142 
143  if (buffer < 0)
144  {
145  if (buffer == -1 && !dropIfNoBuffersAvailable) return nullptr;
146  if (buffer == -2)
147  {
148  TLOG_ERROR("SharedMemoryEventManager") << "Dropping fragment because data taking has already passed this event number: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
149  }
150  else
151  {
152  TLOG_ERROR("SharedMemoryEventManager") << "Dropping fragment because there is no room in the queue and reliable mode is off: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
153  std::unique_lock<std::mutex> lk(sequence_id_mutex_);
154  }
155  dropped_data_.reset(new Fragment(frag.word_count - frag.num_words()));
156  return dropped_data_->dataBegin();
157  }
158 
159  buffer_writes_pending_[buffer]++;
160  TraceLock lk(buffer_mutexes_[buffer], 50, "WriteFragmentHeader");
161  Write(buffer, &frag, frag.num_words() * sizeof(RawDataType));
162 
163  auto pos = reinterpret_cast<RawDataType*>(GetWritePos(buffer));
164  if (frag.word_count - frag.num_words() > 0) {
165  IncrementWritePos(buffer, (frag.word_count - frag.num_words()) * sizeof(RawDataType));
166  }
167 
168  TLOG_ARB(14, "SharedMemoryEventManager") << "WriteFragmentHeader END" << TLOG_ENDL;
169  return pos;
170 
171 }
172 
173 void artdaq::SharedMemoryEventManager::DoneWritingFragment(detail::RawFragmentHeader frag)
174 {
175  TLOG_TRACE("SharedMemoryEventManager") << "DoneWritingFragment BEGIN" << TLOG_ENDL;
176  auto buffer = getBufferForSequenceID_(frag.sequence_id, false, frag.timestamp);
177  if (buffer == -1) Detach(true, "SharedMemoryEventManager", "getBufferForSequenceID_ returned -1 when it REALLY shouldn't have! Check program logic!");
178  if (buffer == -2) return;
179 
180  auto hdr = getEventHeader_(buffer);
181  if (update_run_ids_)
182  {
183  hdr->run_id = run_id_;
184  hdr->subrun_id = subrun_id_;
185  }
186 
187  buffer_writes_pending_[buffer]--;
188  if (buffer_writes_pending_[buffer] != 0)
189  {
190  TLOG_TRACE("SharedMemoryEventManager") << "Done writing fragment, but there's another writer. Not doing bookkeeping steps." << TLOG_ENDL;
191  return;
192  }
193  auto frag_count = GetFragmentCount(frag.sequence_id);
194  hdr->is_complete = frag_count == num_fragments_per_event_;
195 #if ART_SUPPORTS_DUPLICATE_EVENTS
196  if (!hdr->is_complete && released_incomplete_events_.count(frag.sequence_id)) {
197  hdr->is_complete = frag_count == released_incomplete_events_[frag.sequence_id] && buffer_writes_pending_[buffer] == 0;
198  }
199 #endif
200 
201  complete_buffer_(buffer);
202  requests_.SendRequest(true);
203  TLOG_TRACE("SharedMemoryEventManager") << "DoneWritingFragment END" << TLOG_ENDL;
204 }
205 
206 size_t artdaq::SharedMemoryEventManager::GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type)
207 {
208  return GetFragmentCountInBuffer(getBufferForSequenceID_(seqID, false), type);
209 }
210 
211 size_t artdaq::SharedMemoryEventManager::GetFragmentCountInBuffer(int buffer, Fragment::type_t type)
212 {
213  if (buffer == -1) return 0;
214  ResetReadPos(buffer);
215  IncrementReadPos(buffer, sizeof(detail::RawEventHeader));
216 
217  size_t count = 0;
218 
219  while (MoreDataInBuffer(buffer))
220  {
221  auto fragHdr = reinterpret_cast<artdaq::detail::RawFragmentHeader*>(GetReadPos(buffer));
222  IncrementReadPos(buffer, fragHdr->word_count * sizeof(RawDataType));
223  if (type != Fragment::InvalidFragmentType && fragHdr->type != type) continue;
224  TLOG_TRACE("GetFragmentCount") << "Adding Fragment with size=" << std::to_string(fragHdr->word_count) << " to Fragment count" << TLOG_ENDL;
225  ++count;
226  }
227 
228  return count;
229 }
230 
231 void artdaq::SharedMemoryEventManager::RunArt(std::shared_ptr<art_config_file> config_file, pid_t& pid_out)
232 {
233  while (restart_art_)
234  {
235  send_init_frag_();
236  TLOG_INFO("SharedMemoryEventManager") << "Starting art process with config file " << config_file->getFileName() << TLOG_ENDL;
237  std::vector<char*> args{ (char*)"art", (char*)"-c", &config_file->getFileName()[0], NULL };
238 
239  auto pid = fork();
240  if (pid == 0)
241  { /* child */
242  execvp("art", &args[0]);
243  exit(1);
244  }
245  pid_out = pid;
246 
247  TLOG_INFO("SharedMemoryEventManager") << "PID of new art process is " << pid << TLOG_ENDL;
248  art_processes_.insert(pid);
249  int status;
250  waitpid(pid, &status, 0);
251  TLOG_INFO("SharedMemoryEventManager") << "Removing PID " << pid << " from process list" << TLOG_ENDL;
252  art_processes_.erase(pid);
253  if (status == 0)
254  {
255  TLOG_INFO("SharedMemoryEventManager") << "art process " << pid << " exited normally, " << (restart_art_ ? "restarting" : "not restarting") << TLOG_ENDL;
256  }
257  else
258  {
259  TLOG_WARNING("SharedMemoryEventManager") << "art process " << pid << " exited with status code 0x" << std::hex << status << " (" << std::dec << status << "), " << (restart_art_ ? "restarting" : "not restarting") << TLOG_ENDL;
260  }
261  }
262 }
263 
265 {
266  restart_art_ = true;
267  if (num_art_processes_ == 0) return;
268  for (size_t ii = 0; ii < num_art_processes_; ++ii)
269  {
270  StartArtProcess(current_art_pset_);
271  }
272 }
273 
275 {
276  static std::mutex start_art_mutex;
277  TraceLock lk(start_art_mutex, 15, "StartArtLock");
278  restart_art_ = true;
279  auto initialCount = GetAttachedCount();
280  auto startTime = std::chrono::steady_clock::now();
281 
282  if (pset != current_art_pset_)
283  {
284  current_art_pset_ = pset;
285  current_art_config_file_ = std::make_shared<art_config_file>(pset/*, GetKey(), GetBroadcastKey()*/);
286  }
287  pid_t pid = -1;
288  boost::thread thread([&] {RunArt(current_art_config_file_, pid); });
289  thread.detach();
290 
291 
292  while ((GetAttachedCount() - initialCount < 1 || pid <= 0)
293  && TimeUtils::GetElapsedTime(startTime) < 5)
294  {
295  usleep(1000);
296  }
297  if (GetAttachedCount() - initialCount < 1 || pid <= 0)
298  {
299  TLOG_WARNING("SharedMemoryEventManager") << "art process has not started after 5s. Check art configuration!"
300  << " (pid=" << pid << ", attachedCount=" << std::to_string(GetAttachedCount() - initialCount) << ")" << TLOG_ENDL;
301  return 0;
302  }
303  else
304  {
305  TLOG_INFO("SharedMemoryEventManager") << std::setw(4) << std::fixed << "art initialization took "
306  << TimeUtils::GetElapsedTime(startTime) << " seconds." << TLOG_ENDL;
307 
308  return pid;
309  }
310 
311 }
312 
314 {
315  restart_art_ = false;
316  current_art_config_file_ = nullptr;
317  current_art_pset_ = fhicl::ParameterSet();
318 
319  for (auto pid : pids)
320  {
321  if (kill(pid, 0) >= 0)
322  {
323  pids.erase(pid);
324  }
325  }
326  if (pids.size() == 0)
327  {
328  TLOG_ARB(14, "SharedMemoryEventManager") << "All art processes already exited, nothing to do." << TLOG_ENDL;
329  usleep(1000);
330  return;
331  }
332 
333  TLOG_TRACE("SharedMemoryEventManager") << "Gently informing art processes that it is time to shut down" << TLOG_ENDL;
334  for (auto pid : pids)
335  {
336  kill(pid, SIGQUIT);
337  }
338 
339  int graceful_wait_ms = 1000;
340  int int_wait_ms = 100;
341 
342  TLOG_TRACE("SharedMemoryEventManager") << "Waiting up to " << graceful_wait_ms << " ms for all art processes to exit gracefully" << TLOG_ENDL;
343  for (int ii = 0; ii < graceful_wait_ms; ++ii)
344  {
345  usleep(1000);
346 
347  for (auto pid : pids)
348  {
349  if (kill(pid, 0) < 0)
350  {
351  pids.erase(pid);
352  }
353  }
354  if (pids.size() == 0)
355  {
356  TLOG_TRACE("SharedMemoryEventManager") << "All art processes exited after " << ii << " ms." << TLOG_ENDL;
357  return;
358  }
359  }
360 
361  TLOG_TRACE("SharedMemoryEventManager") << "Insisting that the art processes shut down" << TLOG_ENDL;
362  for (auto pid : pids)
363  {
364  kill(pid, SIGINT);
365  }
366 
367  TLOG_TRACE("SharedMemoryEventManager") << "Waiting up to " << int_wait_ms << " ms for all art processes to exit" << TLOG_ENDL;
368  for (int ii = graceful_wait_ms; ii < graceful_wait_ms + int_wait_ms; ++ii)
369  {
370  usleep(1000);
371 
372  for (auto pid : pids)
373  {
374  if (kill(pid, 0) < 0)
375  {
376  pids.erase(pid);
377  }
378  }
379 
380  if (pids.size() == 0)
381  {
382  TLOG_TRACE("SharedMemoryEventManager") << "All art processes exited after " << ii << " ms." << TLOG_ENDL;
383  return;
384  }
385  }
386 
387  TLOG_TRACE("SharedMemoryEventManager") << "Killing remaning art processes with extreme prejudice" << TLOG_ENDL;
388  while (pids.size() > 0)
389  {
390  kill(*pids.begin(), SIGKILL);
391  }
392 }
393 
394 void artdaq::SharedMemoryEventManager::ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun, int n_art_processes)
395 {
396  TLOG_DEBUG("SharedMemoryEventManager") << "ReconfigureArt BEGIN" << TLOG_ENDL;
397  if (restart_art_) // Art is running
398  {
399  endOfData();
400  }
401  for (size_t ii = 0; ii < broadcasts_.size(); ++ii)
402  {
403  broadcasts_.MarkBufferEmpty(ii, true);
404  }
405  if (newRun == 0) newRun = run_id_ + 1;
406  current_art_pset_ = art_pset;
407  current_art_config_file_ = std::make_shared<art_config_file>(art_pset/*, GetKey(), GetBroadcastKey()*/);
408 
409  if (n_art_processes != -1)
410  {
411  TLOG_INFO("SharedMemoryEventManager") << "Setting number of art processes to " << n_art_processes << TLOG_ENDL;
412  num_art_processes_ = n_art_processes;
413  }
414  startRun(newRun);
415  TLOG_DEBUG("SharedMemoryEventManager") << "ReconfigureArt END" << TLOG_ENDL;
416 }
417 
419 {
420  init_fragment_.reset(nullptr);
421  TLOG_TRACE("SharedMemoryEventManager") << "SharedMemoryEventManager::endOfData" << TLOG_ENDL;
422  restart_art_ = false;
423 
424  size_t initialStoreSize = GetIncompleteEventCount();
425  TLOG_TRACE("SharedMemoryEventManager") << "endOfData: Flushing " << initialStoreSize
426  << " stale events from the SharedMemoryEventManager." << TLOG_ENDL;
427  int counter = initialStoreSize;
428  while (active_buffers_.size() > 0 && counter > 0)
429  {
430  complete_buffer_(*active_buffers_.begin());
431  counter--;
432  }
433  TLOG_TRACE("SharedMemoryEventManager") << "endOfData: Done flushing, there are now " << GetIncompleteEventCount()
434  << " stale events in the SharedMemoryEventManager." << TLOG_ENDL;
435 
436 
437  TLOG_TRACE("SharedMemoryEventManager") << "Waiting for " << std::to_string(ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_))) << " outstanding buffers..." << TLOG_ENDL;
438  auto start = std::chrono::steady_clock::now();
439  auto lastReadCount = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
440 
441  // We will wait until no buffer has been read for 1 second.
442  while (lastReadCount > 0 && TimeUtils::GetElapsedTime(start) < 1)
443  {
444  auto temp = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
445  if (temp != lastReadCount)
446  {
447  TLOG_TRACE("SharedMemoryEventManager") << "Waiting for " << std::to_string(temp) << " outstanding buffers..." << TLOG_ENDL;
448  lastReadCount = temp;
449  start = std::chrono::steady_clock::now();
450  }
451  if (lastReadCount > 0) usleep(1000);
452  }
453 
454  TLOG_TRACE("SharedMemoryEventManager") << "endOfData: Broadcasting EndOfData Fragment" << TLOG_ENDL;
455  FragmentPtr outFrag = std::move(Fragment::eodFrag(GetBufferCount()));
456  bool success = broadcastFragment_(std::move(outFrag), outFrag);
457  if (!success)
458  {
459  TLOG_TRACE("SharedMemoryEventManager") << "endOfData: Clearing buffers to make room for EndOfData Fragment" << TLOG_ENDL;
460  for (size_t ii = 0; ii < size(); ++ii)
461  {
462  broadcasts_.MarkBufferEmpty(ii, true);
463  }
464  broadcastFragment_(std::move(outFrag), outFrag);
465  }
466 
467  while (art_processes_.size() > 0)
468  {
469  TLOG_DEBUG("SharedMemoryEventManager") << "Waiting for all art processes to exit, there are " << std::to_string(art_processes_.size()) << " remaining." << TLOG_ENDL;
470  ShutdownArtProcesses(art_processes_);
471  }
472  ResetAttachedCount();
473 
474  TLOG_TRACE("SharedMemoryEventManager") << "endOfData: Clearing buffers" << TLOG_ENDL;
475  for (size_t ii = 0; ii < size(); ++ii)
476  {
477  MarkBufferEmpty(ii, true);
478  }
479  released_incomplete_events_.clear();
480 
481  TLOG_TRACE("SharedMemoryEventManager") << "endOfData END" << TLOG_ENDL;
482  TLOG_INFO("SharedMemoryEventManager") << "EndOfData Complete. There were " << GetLastSeenBufferID() << " events processed in this run." << TLOG_ENDL;
483  return true;
484 }
485 
487 {
488  init_fragment_.reset(nullptr);
489  StartArt();
490  run_id_ = runID;
491  subrun_id_ = 1;
492  requests_.SendRoutingToken(queue_size_);
493  TLOG_DEBUG("SharedMemoryEventManager") << "Starting run " << run_id_
494  << ", max queue size = "
495  << queue_size_
496  << ", queue size = "
497  << GetLockedBufferCount() << TLOG_ENDL;
498  if (metricMan)
499  {
500  double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
501  metricMan->sendMetric("Run Number", runSubrun, "Run:Subrun", 1, MetricMode::LastPoint);
502  }
503 }
504 
506 {
507  ++subrun_id_;
508  if (metricMan)
509  {
510  double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
511  metricMan->sendMetric("Run Number", runSubrun, "Run:Subrun", 1, MetricMode::LastPoint);
512  }
513 }
514 
516 {
517  FragmentPtr endOfRunFrag(new
518  Fragment(static_cast<size_t>
519  (ceil(sizeof(my_rank) /
520  static_cast<double>(sizeof(Fragment::value_type))))));
521 
522  endOfRunFrag->setSystemType(Fragment::EndOfRunFragmentType);
523  *endOfRunFrag->dataBegin() = my_rank;
524  broadcastFragment_(std::move(endOfRunFrag), endOfRunFrag);
525 
526  return true;
527 }
528 
530 {
531  std::unique_ptr<artdaq::Fragment>
532  endOfSubrunFrag(new
533  Fragment(static_cast<size_t>
534  (ceil(sizeof(my_rank) /
535  static_cast<double>(sizeof(Fragment::value_type))))));
536 
537  endOfSubrunFrag->setSystemType(Fragment::EndOfSubrunFragmentType);
538  *endOfSubrunFrag->dataBegin() = my_rank;
539 
540  broadcastFragment_(std::move(endOfSubrunFrag), endOfSubrunFrag);
541 
542  TLOG_INFO("SharedMemoryEventManager") << "Subrun " << subrun_id_ << " in run " << run_id_ << " has ended. There were " << subrun_event_count_ << " events in this subrun." << TLOG_ENDL;
543  subrun_event_count_ = 0;
544 
545  return true;
546 }
547 
549 {
550  if (metricMan)
551  {
552  metricMan->sendMetric("Incomplete Event Count", GetIncompleteEventCount(), "events", 1, MetricMode::LastPoint);
553  metricMan->sendMetric("Pending Event Count", GetPendingEventCount(), "events", 1, MetricMode::LastPoint);
554  }
555  check_pending_buffers_();
556  if (incomplete_event_report_interval_ms_ > 0 && GetLockedBufferCount())
557  {
558  if (TimeUtils::GetElapsedTimeMilliseconds(last_incomplete_event_report_time_) < static_cast<size_t>(incomplete_event_report_interval_ms_))
559  return;
560 
561  last_incomplete_event_report_time_ = std::chrono::steady_clock::now();
562  std::ostringstream oss;
563  oss << "Incomplete Events (" << num_fragments_per_event_ << "): ";
564  for (auto& ev : active_buffers_)
565  {
566  auto hdr = getEventHeader_(ev);
567  oss << hdr->sequence_id << " (" << GetFragmentCount(hdr->sequence_id) << "), ";
568  }
569  TLOG_DEBUG("SharedMemoryEventManager") << oss.str() << TLOG_ENDL;
570  }
571 }
572 
573 bool artdaq::SharedMemoryEventManager::broadcastFragment_(FragmentPtr frag, FragmentPtr& outFrag)
574 {
575  auto buffer = broadcasts_.GetBufferForWriting(false);
576  auto start_time = std::chrono::steady_clock::now();
577  while (buffer == -1 && TimeUtils::GetElapsedTimeMilliseconds(start_time) < static_cast<size_t>(broadcast_timeout_ms_))
578  {
579  usleep(10000);
580  buffer = broadcasts_.GetBufferForWriting(false);
581  }
582  if (buffer == -1)
583  {
584  TLOG_ERROR("SharedMemoryEventManager") << "Broadcast of fragment type " << frag->typeString() << " failed due to timeout waiting for buffer!" << TLOG_ENDL;
585  outFrag.swap(frag);
586  return false;
587  }
588 
589  auto hdr = reinterpret_cast<detail::RawEventHeader*>(broadcasts_.GetBufferStart(buffer));
590  hdr->run_id = run_id_;
591  hdr->subrun_id = subrun_id_;
592  hdr->sequence_id = frag->sequenceID();
593  hdr->is_complete = true;
594  broadcasts_.IncrementWritePos(buffer, sizeof(detail::RawEventHeader));
595 
596  TLOG_TRACE("SharedMemoryEventManager") << "broadcastFragment_ before Write calls" << TLOG_ENDL;
597  broadcasts_.Write(buffer, frag->headerAddress(), frag->size() * sizeof(RawDataType));
598 
599  broadcasts_.MarkBufferFull(buffer, -1);
600  outFrag.swap(frag);
601  return true;
602 }
603 
604 artdaq::detail::RawEventHeader* artdaq::SharedMemoryEventManager::getEventHeader_(int buffer)
605 {
606  return reinterpret_cast<detail::RawEventHeader*>(GetBufferStart(buffer));
607 }
608 
609 int artdaq::SharedMemoryEventManager::getBufferForSequenceID_(Fragment::sequence_id_t seqID, bool create_new, Fragment::timestamp_t timestamp)
610 {
611  check_pending_buffers_();
612  std::unique_lock<std::mutex> lk(sequence_id_mutex_);
613  TLOG_ARB(14, "SharedMemoryEventManager") << "getBufferForSequenceID " << std::to_string(seqID) << " BEGIN" << TLOG_ENDL;
614  auto buffers = GetBuffersOwnedByManager();
615  for (auto& buf : buffers)
616  {
617  auto hdr = getEventHeader_(buf);
618  if (hdr->sequence_id == seqID)
619  {
620  TLOG_ARB(14, "SharedMemoryEventManager") << "getBufferForSequenceID " << std::to_string(seqID) << " returning " << buf << TLOG_ENDL;
621  return buf;
622  }
623  }
624 
625 #if !ART_SUPPORTS_DUPLICATE_EVENTS
626  if (released_incomplete_events_.count(seqID)) {
627  TLOG_ERROR("SharedMemoryEventManager") << "Buffer has already been marked \"Incomplete\" and sent to art!" << TLOG_ENDL;
628  return -2;
629  }
630 #endif
631 
632  if (!create_new) return -1;
633 
634  int new_buffer = GetBufferForWriting(false);
635 
636  if (new_buffer == -1)
637  {
638  new_buffer = GetBufferForWriting(overwrite_mode_);
639  }
640 
641  if (new_buffer == -1) return -1;
642  TraceLock(buffer_mutexes_[new_buffer], 34, "getBufferForSequenceID");
643  auto hdr = getEventHeader_(new_buffer);
644  hdr->is_complete = false;
645  hdr->run_id = run_id_;
646  hdr->subrun_id = subrun_id_;
647  hdr->sequence_id = seqID;
648  buffer_writes_pending_[new_buffer] = 0;
649  IncrementWritePos(new_buffer, sizeof(detail::RawEventHeader));
650 
651  active_buffers_.insert(new_buffer);
652 
653  if (timestamp != Fragment::InvalidTimestamp)
654  {
655  requests_.AddRequest(seqID, timestamp);
656  }
657  requests_.SendRequest();
658  TLOG_ARB(14, "SharedMemoryEventManager") << "getBufferForSequenceID " << std::to_string(seqID) << " returning newly initialized buffer " << new_buffer << TLOG_ENDL;
659  return new_buffer;
660 }
661 
662 bool artdaq::SharedMemoryEventManager::hasFragments_(int buffer)
663 {
664  if (buffer == -1) return true;
665  if (!CheckBuffer(buffer, BufferSemaphoreFlags::Writing))
666  {
667  return true;
668  }
669  ResetReadPos(buffer);
670  IncrementReadPos(buffer, sizeof(detail::RawEventHeader));
671  return MoreDataInBuffer(buffer);
672 }
673 
674 void artdaq::SharedMemoryEventManager::complete_buffer_(int buffer)
675 {
676  auto hdr = getEventHeader_(buffer);
677  if (hdr->is_complete)
678  {
679  TLOG_DEBUG("SharedMemoryEventManager") << "complete_buffer_: This fragment completes event " << std::to_string(hdr->sequence_id) << "." << TLOG_ENDL;
680 
681  requests_.RemoveRequest(hdr->sequence_id);
682  requests_.SendRoutingToken(1);
683  {
684  std::unique_lock<std::mutex> lk(sequence_id_mutex_);
685  active_buffers_.erase(buffer);
686  pending_buffers_.insert(buffer);
687  }
688  }
689  check_pending_buffers_();
690 }
691 
692 bool artdaq::SharedMemoryEventManager::bufferComparator(int bufA, int bufB)
693 {
694  return getEventHeader_(bufA)->sequence_id < getEventHeader_(bufB)->sequence_id;
695 }
696 
697 void artdaq::SharedMemoryEventManager::check_pending_buffers_()
698 {
699  TLOG_TRACE("SharedMemoryEventManager") << "check_pending_buffers_ BEGIN" << TLOG_ENDL;
700  {
701  std::unique_lock<std::mutex> lk(sequence_id_mutex_);
702  auto buffers = GetBuffersOwnedByManager();
703  for (auto buf : buffers)
704  {
705  if (ResetBuffer(buf) && !pending_buffers_.count(buf))
706  {
707  auto hdr = getEventHeader_(buf);
708  if (active_buffers_.count(buf))
709  {
710  TLOG_WARNING("SharedMemoryEventManager") << "Active event " << std::to_string(hdr->sequence_id) << " is stale. Scheduling release of incomplete event to art." << TLOG_ENDL;
711  requests_.RemoveRequest(hdr->sequence_id);
712  requests_.SendRoutingToken(1);
713  active_buffers_.erase(buf);
714  pending_buffers_.insert(buf);
715  if (!released_incomplete_events_.count(hdr->sequence_id)) {
716  released_incomplete_events_[hdr->sequence_id] = num_fragments_per_event_ - GetFragmentCountInBuffer(buf);
717  }
718  else {
719  released_incomplete_events_[hdr->sequence_id] -= GetFragmentCountInBuffer(buf);
720  }
721  }
722 
723  }
724  }
725 
726  Fragment::sequence_id_t lowestSeqId = Fragment::InvalidSequenceID;
727 
728  // Only use "weak ordering" when buffers are available for writing
729  if (WriteReadyCount(false) != 0)
730  {
731  for (auto buf : active_buffers_)
732  {
733  auto hdr = getEventHeader_(buf);
734  TLOG_TRACE("SharedMemoryEventManager") << "Buffer: " << buf << ", SeqID: " << std::to_string(hdr->sequence_id) << ", ACTIVE" << TLOG_ENDL;
735  if (hdr->sequence_id < lowestSeqId)
736  {
737  lowestSeqId = hdr->sequence_id;
738  }
739  }
740  TLOG_TRACE("SharedMemoryEventManager") << "Lowest SeqID held: " << std::to_string(lowestSeqId) << TLOG_ENDL;
741  }
742 
743  std::list<int> sorted_buffers(pending_buffers_.begin(), pending_buffers_.end());
744  sorted_buffers.sort([this](int a, int b) {return bufferComparator(a, b); });
745  for (auto buf : sorted_buffers)
746  {
747  auto hdr = getEventHeader_(buf);
748  if (hdr->sequence_id > lowestSeqId) break;
749  TLOG_DEBUG("SharedMemoryEventManager") << "Releasing event " << std::to_string(hdr->sequence_id) << " in buffer " << buf << " to art." << TLOG_ENDL;
750  MarkBufferFull(buf);
751  subrun_event_count_++;
752  pending_buffers_.erase(buf);
753  }
754  }
755 
756  TLOG_TRACE("SharedMemoryEventManager") << "check_pending_buffers_: Sending Metrics" << TLOG_ENDL;
757  if (metricMan)
758  {
759  auto full = ReadReadyCount();
760  auto empty = WriteReadyCount(overwrite_mode_);
761  auto total = size();
762  metricMan->sendMetric("Shared Memory Full Buffers", full, "buffers", 2, MetricMode::LastPoint);
763  metricMan->sendMetric("Shared Memory Available Buffers", empty, "buffers", 2, MetricMode::LastPoint);
764  metricMan->sendMetric("Shared Memory Full %", full * 100 / static_cast<double>(total), "%", 2, MetricMode::LastPoint);
765  metricMan->sendMetric("Shared Memory Available %", empty * 100 / static_cast<double>(total), "%", 2, MetricMode::LastPoint);
766  }
767  TLOG_TRACE("SharedMemoryEventManager") << "check_pending_buffers_ END" << TLOG_ENDL;
768 }
769 
770 void artdaq::SharedMemoryEventManager::send_init_frag_()
771 {
772  if (init_fragment_ != nullptr)
773  {
774  TLOG_TRACE("SharedMemoryEventManager") << "Sending init Fragment to art..." << TLOG_ENDL;
775 
776 #if 0
777  std::string fileName = "receiveInitMessage_" + std::to_string(my_rank) + ".bin";
778  std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
779  ostream.write(reinterpret_cast<char*>(init_fragment_->dataBeginBytes()), init_fragment_->dataSizeBytes());
780  ostream.close();
781 #endif
782 
783  broadcastFragment_(std::move(init_fragment_), init_fragment_);
784  TLOG_TRACE("SharedMemoryEventManager") << "Init Fragment sent" << TLOG_ENDL;
785  }
786  else if (send_init_fragments_)
787  {
788  TLOG_WARNING("SharedMemoryEventManager") << "Cannot send init fragment because I haven't yet received one!" << TLOG_ENDL;
789  }
790 }
791 
793 {
794  if (!init_fragment_ || init_fragment_ == nullptr)
795  {
796  init_fragment_.swap(frag);
797  send_init_frag_();
798  }
799 }
void RunArt(std::shared_ptr< art_config_file > config_file, pid_t &pid_out)
Run an art instance, recording the return codes and restarting it until the end flag is raised...
virtual ~SharedMemoryEventManager()
SharedMemoryEventManager Destructor.
void ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun=0, int n_art_processes=-1)
Restart all art processes, using the given fhicl code to configure the new art processes.
pid_t StartArtProcess(fhicl::ParameterSet pset)
Start one art process.
RawDataType * WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable=false)
Get a pointer to a reserved memory area for the given Fragment header.
RawEvent::run_id_t run_id_t
Copy RawEvent::run_id_t into local scope.
size_t GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in an event.
void StartArt()
Start all the art processes.
void ShutdownArtProcesses(std::set< pid_t > pids)
Shutdown a set of art processes.
void SetInitFragment(FragmentPtr frag)
Set the stored Init fragment, if one has not yet been set already.
void sendMetrics()
Send metrics to the MetricManager, if one has been instantiated in the application.
void startSubrun()
Start a new Subrun, incrementing the subrun number.
SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset)
SharedMemoryEventManager Constructor.
bool endSubrun()
Send an EndOfSubRunFragment to the art thread.
bool endRun()
Send an EndOfRunFragment to the art thread.
void DoneWritingFragment(detail::RawFragmentHeader frag)
Used to indicate that the given Fragment is now completely in the buffer. Will check for buffer compl...
bool endOfData()
Indicate that the end of input has been reached to the art processes.
void startRun(run_id_t runID)
Start a Run.
size_t GetFragmentCountInBuffer(int buffer, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in a buffer.