artdaq  v3_12_02
SharedMemoryEventManager.cc
1 
2 #include "artdaq/DAQrate/SharedMemoryEventManager.hh"
3 #include <sys/wait.h>
4 
5 #include <memory>
6 #include <numeric>
7 
8 #include "artdaq-core/Core/StatisticsCollection.hh"
9 #include "artdaq-core/Utilities/TraceLock.hh"
10 
11 #define TRACE_NAME (app_name + "_SharedMemoryEventManager").c_str()
12 
13 #define TLVL_BUFFER 40
14 #define TLVL_BUFLCK 41
15 
16 #define build_key(seed) ((seed) + ((GetPartitionNumber() + 1) << 16) + (getpid() & 0xFFFF))
17 
18 std::mutex artdaq::SharedMemoryEventManager::sequence_id_mutex_;
19 std::mutex artdaq::SharedMemoryEventManager::subrun_event_map_mutex_;
20 const std::string artdaq::SharedMemoryEventManager::
21  FRAGMENTS_RECEIVED_STAT_KEY("SharedMemoryEventManagerFragmentsReceived");
22 const std::string artdaq::SharedMemoryEventManager::
23  EVENTS_RELEASED_STAT_KEY("SharedMemoryEventManagerEventsReleased");
24 
25 artdaq::SharedMemoryEventManager::SharedMemoryEventManager(const fhicl::ParameterSet& pset, fhicl::ParameterSet art_pset)
26  : SharedMemoryManager(pset.get<uint32_t>("shared_memory_key", build_key(0xEE000000)),
27  pset.get<size_t>("buffer_count"),
28  pset.has_key("max_event_size_bytes") ? pset.get<size_t>("max_event_size_bytes") : pset.get<size_t>("expected_fragments_per_event") * pset.get<size_t>("max_fragment_size_bytes"),
29  pset.get<size_t>("stale_buffer_timeout_usec", pset.get<size_t>("event_queue_wait_time", 5) * 1000000),
30  !pset.get<bool>("broadcast_mode", false))
31  , num_art_processes_(pset.get<size_t>("art_analyzer_count", 1))
32  , num_fragments_per_event_(pset.get<size_t>("expected_fragments_per_event"))
33  , queue_size_(pset.get<size_t>("buffer_count"))
34  , run_id_(0)
35  , max_subrun_event_map_length_(pset.get<size_t>("max_subrun_lookup_table_size", 100))
36  , max_event_list_length_(pset.get<size_t>("max_event_list_length", 100))
37  , update_run_ids_(pset.get<bool>("update_run_ids_on_new_fragment", true))
38  , use_sequence_id_for_event_number_(pset.get<bool>("use_sequence_id_for_event_number", true))
39  , overwrite_mode_(!pset.get<bool>("use_art", true) || pset.get<bool>("overwrite_mode", false) || pset.get<bool>("broadcast_mode", false))
40  , init_fragment_count_(pset.get<size_t>("init_fragment_count", pset.get<bool>("send_init_fragments", true) ? 1 : 0))
41  , running_(false)
42  , buffer_writes_pending_()
43  , open_event_report_interval_ms_(pset.get<int>("open_event_report_interval_ms", pset.get<int>("incomplete_event_report_interval_ms", -1)))
44  , last_open_event_report_time_(std::chrono::steady_clock::now())
45  , last_backpressure_report_time_(std::chrono::steady_clock::now())
46  , last_fragment_header_write_time_(std::chrono::steady_clock::now())
47  , event_timing_(pset.get<size_t>("buffer_count"))
48  , broadcast_timeout_ms_(pset.get<int>("fragment_broadcast_timeout_ms", 3000))
49  , run_event_count_(0)
50  , run_incomplete_event_count_(0)
51  , subrun_event_count_(0)
52  , subrun_incomplete_event_count_(0)
53  , oversize_fragment_count_(0)
54  , maximum_oversize_fragment_count_(pset.get<int>("maximum_oversize_fragment_count", 1))
55  , restart_art_(false)
56  , always_restart_art_(pset.get<bool>("restart_crashed_art_processes", true))
57  , manual_art_(pset.get<bool>("manual_art", false))
58  , current_art_pset_(art_pset)
59  , art_cmdline_(pset.get<std::string>("art_command_line", "art -c #CONFIG_FILE#"))
60  , art_process_index_offset_(pset.get<size_t>("art_index_offset", 0))
61  , minimum_art_lifetime_s_(pset.get<double>("minimum_art_lifetime_s", 2.0))
62  , art_event_processing_time_us_(pset.get<size_t>("expected_art_event_processing_time_us", 1000000))
63  , requests_(nullptr)
64  , tokens_(nullptr)
65  , data_pset_(pset)
66  , broadcasts_(pset.get<uint32_t>("broadcast_shared_memory_key", build_key(0xBB000000)),
67  pset.get<size_t>("broadcast_buffer_count", 10),
68  pset.get<size_t>("broadcast_buffer_size", 0x100000),
69  pset.get<int>("expected_art_event_processing_time_us", 100000) * pset.get<size_t>("buffer_count"), false)
70 {
71  subrun_event_map_[0] = 1;
72  SetMinWriteSize(sizeof(detail::RawEventHeader) + sizeof(detail::RawFragmentHeader));
73  broadcasts_.SetMinWriteSize(sizeof(detail::RawEventHeader) + sizeof(detail::RawFragmentHeader));
74 
75  if (!pset.get<bool>("use_art", true))
76  {
77  TLOG(TLVL_INFO) << "BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:false";
78  num_art_processes_ = 0;
79  }
80  else
81  {
82  TLOG(TLVL_INFO) << "BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:true";
83  TLOG(TLVL_DEBUG + 33) << "art_pset is " << art_pset.to_string();
84  }
85 
86  if (manual_art_)
87  current_art_config_file_ = std::make_shared<art_config_file>(art_pset, GetKey(), GetBroadcastKey());
88  else
89  current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
90 
91  if (overwrite_mode_ && num_art_processes_ > 0)
92  {
93  TLOG(TLVL_WARNING) << "Art is configured to run, but overwrite mode is enabled! Check your configuration if this in unintentional!";
94  }
95  else if (overwrite_mode_)
96  {
97  TLOG(TLVL_INFO) << "Overwrite Mode enabled, no configured art processes at startup";
98  }
99 
100  for (size_t ii = 0; ii < size(); ++ii)
101  {
102  buffer_writes_pending_[ii] = 0;
103  // Make sure the mutexes are created once
104  std::lock_guard<std::mutex> lk(buffer_mutexes_[ii]);
105  }
106 
107  if (!IsValid())
108  {
109  throw cet::exception(app_name + "_SharedMemoryEventManager") << "Unable to attach to Shared Memory!"; // NOLINT(cert-err60-cpp)
110  }
111 
112  TLOG(TLVL_DEBUG + 33) << "Setting Writer rank to " << my_rank;
113  SetRank(my_rank);
114  TLOG(TLVL_DEBUG + 32) << "Writer Rank is " << GetRank();
115 
118 
119  // fetch the monitoring parameters and create the MonitoredQuantity instances
120  statsHelper_.createCollectors(pset, 100, 30.0, 60.0, EVENTS_RELEASED_STAT_KEY);
121 
122  TLOG(TLVL_DEBUG + 33) << "END CONSTRUCTOR";
123 }
124 
126 {
127  TLOG(TLVL_DEBUG + 33) << "DESTRUCTOR";
128  if (running_)
129  {
130  try
131  {
132  endOfData();
133  }
134  catch (...)
135  {
136  // IGNORED
137  }
138  }
139  TLOG(TLVL_DEBUG + 33) << "Destructor END";
140 }
141 
142 bool artdaq::SharedMemoryEventManager::AddFragment(detail::RawFragmentHeader frag, void* dataPtr)
143 {
144  if (!running_) return true;
145 
146  TLOG(TLVL_DEBUG + 33) << "AddFragment(Header, ptr) BEGIN frag.word_count=" << frag.word_count
147  << ", sequence_id=" << frag.sequence_id;
148  auto buffer = getBufferForSequenceID_(frag.sequence_id, true, frag.timestamp);
149  TLOG(TLVL_DEBUG + 33) << "Using buffer " << buffer << " for seqid=" << frag.sequence_id;
150  if (buffer == -1)
151  {
152  return false;
153  }
154  if (buffer == -2)
155  {
156  TLOG(TLVL_ERROR) << "Dropping event because data taking has already passed this event number: " << frag.sequence_id;
157  return true;
158  }
159 
160  auto hdr = getEventHeader_(buffer);
161  if (update_run_ids_)
162  {
163  hdr->run_id = run_id_;
164  }
165  hdr->subrun_id = GetSubrunForSequenceID(frag.sequence_id);
166 
167  TLOG(TLVL_DEBUG + 33) << "AddFragment before Write calls";
168  Write(buffer, dataPtr, frag.word_count * sizeof(RawDataType));
169 
170  TLOG(TLVL_DEBUG + 33) << "Checking for complete event";
171  auto fragmentCount = GetFragmentCount(frag.sequence_id);
172  hdr->is_complete = fragmentCount == num_fragments_per_event_ && buffer_writes_pending_[buffer] == 0;
173  TLOG(TLVL_DEBUG + 33) << "hdr->is_complete=" << std::boolalpha << hdr->is_complete
174  << ", fragmentCount=" << fragmentCount
175  << ", num_fragments_per_event=" << num_fragments_per_event_
176  << ", buffer_writes_pending_[buffer]=" << buffer_writes_pending_[buffer];
177 
178  complete_buffer_(buffer);
179  if (requests_)
180  {
181  requests_->SendRequest(true);
182  }
183 
184  TLOG(TLVL_DEBUG + 33) << "AddFragment END";
185  statsHelper_.addSample(FRAGMENTS_RECEIVED_STAT_KEY, frag.word_count * sizeof(RawDataType));
186  return true;
187 }
188 
189 bool artdaq::SharedMemoryEventManager::AddFragment(FragmentPtr frag, size_t timeout_usec, FragmentPtr& outfrag)
190 {
191  TLOG(TLVL_DEBUG + 33) << "AddFragment(FragmentPtr) BEGIN";
192  auto hdr = *reinterpret_cast<detail::RawFragmentHeader*>(frag->headerAddress()); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
193  auto data = frag->headerAddress();
194  auto start = std::chrono::steady_clock::now();
195  bool sts = false;
196  while (!sts && TimeUtils::GetElapsedTimeMicroseconds(start) < timeout_usec)
197  {
198  sts = AddFragment(hdr, data);
199  if (!sts)
200  {
201  usleep(1000);
202  }
203  }
204  if (!sts)
205  {
206  outfrag = std::move(frag);
207  }
208  TLOG(TLVL_DEBUG + 33) << "AddFragment(FragmentPtr) RETURN " << std::boolalpha << sts;
209  return sts;
210 }
211 
212 artdaq::RawDataType* artdaq::SharedMemoryEventManager::WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable)
213 {
214  if (!running_) return nullptr;
215  TLOG(TLVL_DEBUG + 34) << "WriteFragmentHeader BEGIN";
216  auto buffer = getBufferForSequenceID_(frag.sequence_id, true, frag.timestamp);
217 
218  if (buffer < 0)
219  {
220  if (buffer == -1 && !dropIfNoBuffersAvailable)
221  {
222  std::unique_lock<std::mutex> bp_lk(sequence_id_mutex_);
223  if (TimeUtils::GetElapsedTime(last_backpressure_report_time_) > 1.0)
224  {
225  TLOG(TLVL_WARNING) << app_name << ": Back-pressure condition: All Shared Memory buffers have been full for " << TimeUtils::GetElapsedTime(last_fragment_header_write_time_) << " s!";
226  last_backpressure_report_time_ = std::chrono::steady_clock::now();
227  }
228  if (metricMan)
229  {
230  metricMan->sendMetric("Back-pressure wait time", TimeUtils::GetElapsedTime(last_fragment_header_write_time_), "s", 1, MetricMode::LastPoint);
231  }
232  return nullptr;
233  }
234  if (buffer == -2)
235  {
236  TLOG(TLVL_ERROR) << "Dropping fragment with sequence id " << frag.sequence_id << " and fragment id " << frag.fragment_id << " because data taking has already passed this event.";
237  }
238  else
239  {
240  TLOG(TLVL_INFO) << "Dropping fragment with sequence id " << frag.sequence_id << " and fragment id " << frag.fragment_id << " because there is no room in the queue and reliable mode is off.";
241  }
242  dropped_data_.emplace_back(frag, std::make_unique<Fragment>(frag.word_count - frag.num_words()));
243  auto it = dropped_data_.rbegin();
244 
245  TLOG(TLVL_DEBUG + 35) << "Dropping fragment with sequence id " << frag.sequence_id << " and fragment id " << frag.fragment_id << " into "
246  << static_cast<void*>(it->second->dataBegin()) << " sz=" << it->second->dataSizeBytes();
247 
248  return it->second->dataBegin();
249  }
250 
251  last_backpressure_report_time_ = std::chrono::steady_clock::now();
252  last_fragment_header_write_time_ = std::chrono::steady_clock::now();
253  // Increment this as soon as we know we want to use the buffer
254  buffer_writes_pending_[buffer]++;
255 
256  if (metricMan)
257  {
258  metricMan->sendMetric("Input Fragment Rate", 1, "Fragments/s", 1, MetricMode::Rate);
259  }
260 
261  TLOG(TLVL_BUFLCK) << "WriteFragmentHeader: obtaining buffer_mutexes lock for buffer " << buffer;
262 
263  std::unique_lock<std::mutex> lk(buffer_mutexes_.at(buffer));
264 
265  TLOG(TLVL_BUFLCK) << "WriteFragmentHeader: obtained buffer_mutexes lock for buffer " << buffer;
266 
267  auto hdrpos = reinterpret_cast<RawDataType*>(GetWritePos(buffer)); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
268  Write(buffer, &frag, frag.num_words() * sizeof(RawDataType));
269 
270  auto pos = reinterpret_cast<RawDataType*>(GetWritePos(buffer)); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
271  if (frag.word_count - frag.num_words() > 0)
272  {
273  auto sts = IncrementWritePos(buffer, (frag.word_count - frag.num_words()) * sizeof(RawDataType));
274 
275  if (!sts)
276  {
277  reinterpret_cast<detail::RawFragmentHeader*>(hdrpos)->word_count = frag.num_words(); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
278  reinterpret_cast<detail::RawFragmentHeader*>(hdrpos)->type = Fragment::InvalidFragmentType; // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
279  TLOG(TLVL_ERROR) << "Dropping over-size fragment with sequence id " << frag.sequence_id << " and fragment id " << frag.fragment_id << " because there is no room in the current buffer for this Fragment! (Keeping header)";
280  dropped_data_.emplace_back(frag, std::make_unique<Fragment>(frag.word_count - frag.num_words()));
281  auto it = dropped_data_.rbegin();
282 
283  oversize_fragment_count_++;
284 
285  if (maximum_oversize_fragment_count_ > 0 && oversize_fragment_count_ >= maximum_oversize_fragment_count_)
286  {
287  throw cet::exception("Too many over-size Fragments received! Please adjust max_event_size_bytes or max_fragment_size_bytes!");
288  }
289 
290  TLOG(TLVL_DEBUG + 35) << "Dropping over-size fragment with sequence id " << frag.sequence_id << " and fragment id " << frag.fragment_id
291  << " into " << static_cast<void*>(it->second->dataBegin());
292  return it->second->dataBegin();
293  }
294  }
295  TLOG(TLVL_DEBUG + 34) << "WriteFragmentHeader END";
296  return pos;
297 }
298 
299 void artdaq::SharedMemoryEventManager::DoneWritingFragment(detail::RawFragmentHeader frag)
300 {
301  TLOG(TLVL_DEBUG + 33) << "DoneWritingFragment BEGIN";
302 
303  auto buffer = getBufferForSequenceID_(frag.sequence_id, false, frag.timestamp);
304  if (buffer < 0)
305  {
306  for (auto it = dropped_data_.begin(); it != dropped_data_.end(); ++it)
307  {
308  if (it->first == frag)
309  {
310  dropped_data_.erase(it);
311  return;
312  }
313  }
314  if (buffer == -1)
315  {
316  Detach(true, "SharedMemoryEventManager",
317  "getBufferForSequenceID_ returned -1 in DoneWritingFragment. This indicates a possible mismatch between expected Fragment count and the actual number of Fragments received.");
318  }
319  return;
320  }
321 
322  if (!frag.valid)
323  {
324  UpdateFragmentHeader(buffer, frag);
325  }
326 
327  statsHelper_.addSample(FRAGMENTS_RECEIVED_STAT_KEY, frag.word_count * sizeof(RawDataType));
328  {
329  TLOG(TLVL_BUFLCK) << "DoneWritingFragment: obtaining buffer_mutexes lock for buffer " << buffer;
330 
331  std::unique_lock<std::mutex> lk(buffer_mutexes_.at(buffer));
332 
333  TLOG(TLVL_BUFLCK) << "DoneWritingFragment: obtained buffer_mutexes lock for buffer " << buffer;
334 
335  TLOG(TLVL_DEBUG + 32) << "DoneWritingFragment: Received Fragment with sequence ID " << frag.sequence_id << " and fragment id " << frag.fragment_id << " (type " << static_cast<int>(frag.type) << ")";
336  auto hdr = getEventHeader_(buffer);
337  if (update_run_ids_)
338  {
339  hdr->run_id = run_id_;
340  }
341  hdr->subrun_id = GetSubrunForSequenceID(frag.sequence_id);
342 
343  TLOG(TLVL_DEBUG + 33) << "DoneWritingFragment: Updating buffer touch time";
344  TouchBuffer(buffer);
345 
346  if (buffer_writes_pending_[buffer] > 1)
347  {
348  TLOG(TLVL_DEBUG + 33) << "Done writing fragment, but there's another writer. Not doing bookkeeping steps.";
349  buffer_writes_pending_[buffer]--;
350  return;
351  }
352  TLOG(TLVL_DEBUG + 33) << "Done writing fragment, and no other writer. Doing bookkeeping steps.";
353  auto frag_count = GetFragmentCount(frag.sequence_id);
354  hdr->is_complete = frag_count >= num_fragments_per_event_;
355 
356  if (frag_count > num_fragments_per_event_)
357  {
358  TLOG(TLVL_WARNING) << "DoneWritingFragment: This Event has more Fragments ( " << frag_count << " ) than specified in configuration ( " << num_fragments_per_event_ << " )!"
359  << " This is probably due to a misconfiguration and is *not* a reliable mode!";
360  }
361 
362  TLOG(TLVL_DEBUG + 33) << "DoneWritingFragment: Received Fragment with sequence ID " << frag.sequence_id << " and fragment id " << frag.fragment_id << ", count/expected = " << frag_count << "/" << num_fragments_per_event_;
363 #if ART_SUPPORTS_DUPLICATE_EVENTS
364  if (!hdr->is_complete && released_incomplete_events_.count(frag.sequence_id))
365  {
366  hdr->is_complete = frag_count >= released_incomplete_events_[frag.sequence_id] && buffer_writes_pending_[buffer] == 0;
367  }
368 #endif
369 
370  complete_buffer_(buffer);
371 
372  // Move this down here to avoid race condition
373  buffer_writes_pending_[buffer]--;
374  }
375  if (requests_)
376  {
377  requests_->SendRequest(true);
378  }
379  TLOG(TLVL_DEBUG + 33) << "DoneWritingFragment END";
380 }
381 
382 size_t artdaq::SharedMemoryEventManager::GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type)
383 {
384  return GetFragmentCountInBuffer(getBufferForSequenceID_(seqID, false), type);
385 }
386 
387 size_t artdaq::SharedMemoryEventManager::GetFragmentCountInBuffer(int buffer, Fragment::type_t type)
388 {
389  if (buffer < 0)
390  {
391  return 0;
392  }
393  ResetReadPos(buffer);
394  IncrementReadPos(buffer, sizeof(detail::RawEventHeader));
395 
396  size_t count = 0;
397 
398  while (MoreDataInBuffer(buffer))
399  {
400  auto fragHdr = reinterpret_cast<artdaq::detail::RawFragmentHeader*>(GetReadPos(buffer)); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
401  IncrementReadPos(buffer, fragHdr->word_count * sizeof(RawDataType));
402  if (type != Fragment::InvalidFragmentType && fragHdr->type != type)
403  {
404  continue;
405  }
406  TLOG(TLVL_DEBUG + 33) << "Adding Fragment with size=" << fragHdr->word_count << " to Fragment count";
407  ++count;
408  }
409 
410  return count;
411 }
412 
413 void artdaq::SharedMemoryEventManager::UpdateFragmentHeader(int buffer, artdaq::detail::RawFragmentHeader hdr)
414 {
415  if (buffer < 0)
416  {
417  return;
418  }
419  ResetReadPos(buffer);
420  IncrementReadPos(buffer, sizeof(detail::RawEventHeader));
421 
422  while (MoreDataInBuffer(buffer))
423  {
424  auto fragHdr = reinterpret_cast<artdaq::detail::RawFragmentHeader*>(GetReadPos(buffer)); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
425  if (hdr.fragment_id == fragHdr->fragment_id)
426  {
427  *fragHdr = hdr;
428  break;
429  }
430  }
431 
432  return;
433 }
434 
435 void artdaq::SharedMemoryEventManager::RunArt(const std::shared_ptr<art_config_file>& config_file, size_t process_index, const std::shared_ptr<std::atomic<pid_t>>& pid_out)
436 {
437  do
438  {
439  auto start_time = std::chrono::steady_clock::now();
440  send_init_frags_();
441  TLOG(TLVL_INFO) << "Starting art process with config file " << config_file->getFileName();
442 
443  pid_t pid = 0;
444 
445  if (!manual_art_)
446  {
447  pid = fork();
448  if (pid == 0)
449  { /* child */
450  // 23-May-2018, KAB: added the setting of the partition number env var
451  // in the environment of the child art process so that Globals.hh
452  // will pick it up there and provide it to the artdaq classes that
453  // are used in data transfers, etc. within the art process.
454  std::string envVarKey = "ARTDAQ_PARTITION_NUMBER";
455  std::string envVarValue = std::to_string(GetPartitionNumber());
456  if (setenv(envVarKey.c_str(), envVarValue.c_str(), 1) != 0)
457  {
458  TLOG(TLVL_ERROR) << "Error setting environment variable \"" << envVarKey
459  << "\" in the environment of a child art process. "
460  << "This may result in incorrect TCP port number "
461  << "assignments or other issues, and data may "
462  << "not flow through the system correctly.";
463  }
464  envVarKey = "ARTDAQ_APPLICATION_NAME";
465  envVarValue = app_name;
466  if (setenv(envVarKey.c_str(), envVarValue.c_str(), 1) != 0)
467  {
468  TLOG(TLVL_DEBUG + 32) << "Error setting environment variable \"" << envVarKey
469  << "\" in the environment of a child art process. ";
470  }
471  envVarKey = "ARTDAQ_RANK";
472  envVarValue = std::to_string(my_rank);
473  if (setenv(envVarKey.c_str(), envVarValue.c_str(), 1) != 0)
474  {
475  TLOG(TLVL_DEBUG + 32) << "Error setting environment variable \"" << envVarKey
476  << "\" in the environment of a child art process. ";
477  }
478 
479  TLOG(TLVL_DEBUG + 33) << "Parsing art command line";
480  auto args = parse_art_command_line_(config_file, process_index);
481 
482  TLOG(TLVL_DEBUG + 33) << "Calling execvp with application name " << args[0];
483  execvp(args[0], &args[0]);
484 
485  TLOG(TLVL_DEBUG + 33) << "Application exited, cleaning up";
486  for (auto& arg : args)
487  {
488  delete[] arg;
489  }
490 
491  exit(1);
492  }
493  }
494  else
495  {
496  // Using cin/cout here to ensure console is active (artdaqDriver)
497  std::cout << "Please run the following command in a separate terminal:" << std::endl
498  << "art -c " << config_file->getFileName() << std::endl
499  << "Then, in a third terminal, execute: \"ps aux|grep [a]rt -c " << config_file->getFileName() << "\" and note the PID of the art process." << std::endl
500  << "Finally, return to this window and enter the pid: " << std::endl;
501  std::cin >> pid;
502  }
503  *pid_out = pid;
504 
505  TLOG(TLVL_INFO) << "PID of new art process is " << pid;
506  {
507  std::unique_lock<std::mutex> lk(art_process_mutex_);
508  art_processes_.insert(pid);
509  }
510  siginfo_t status;
511  auto sts = 0;
512  if (!manual_art_)
513  {
514  sts = waitid(P_PID, pid, &status, WEXITED);
515  }
516  else
517  {
518  while (kill(pid, 0) >= 0) usleep(10000);
519 
520  TLOG(TLVL_INFO) << "Faking good exit status, please see art process for actual exit status!";
521  status.si_code = CLD_EXITED;
522  status.si_status = 0;
523  }
524  TLOG(TLVL_INFO) << "Removing PID " << pid << " from process list";
525  {
526  std::unique_lock<std::mutex> lk(art_process_mutex_);
527  art_processes_.erase(pid);
528  }
529  if (sts < 0)
530  {
531  TLOG(TLVL_WARNING) << "Error occurred in waitid for art process " << pid << ": " << errno << " (" << strerror(errno) << ").";
532  }
533  else if (status.si_code == CLD_EXITED && status.si_status == 0)
534  {
535  TLOG(TLVL_INFO) << "art process " << pid << " exited normally, " << (restart_art_ ? "restarting" : "not restarting");
536  }
537  else
538  {
539  auto art_lifetime = TimeUtils::GetElapsedTime(start_time);
540  if (art_lifetime < minimum_art_lifetime_s_)
541  {
542  restart_art_ = false;
543  }
544 
545  auto exit_type = "exited with status code";
546  switch (status.si_code)
547  {
548  case CLD_DUMPED:
549  case CLD_KILLED:
550  exit_type = "was killed with signal";
551  break;
552  case CLD_EXITED:
553  default:
554  break;
555  }
556 
557  TLOG((restart_art_ ? TLVL_WARNING : TLVL_ERROR))
558  << "art process " << pid << " " << exit_type << " " << status.si_status
559  << (status.si_code == CLD_DUMPED ? " (core dumped)" : "")
560  << " after running for " << std::setprecision(2) << std::fixed << art_lifetime << " seconds, "
561  << (restart_art_ ? "restarting" : "not restarting");
562  }
563  } while (restart_art_);
564 }
565 
567 {
568  restart_art_ = always_restart_art_;
569  if (num_art_processes_ == 0)
570  {
571  return;
572  }
573  for (size_t ii = 0; ii < num_art_processes_; ++ii)
574  {
575  StartArtProcess(current_art_pset_, ii);
576  }
577 }
578 
579 pid_t artdaq::SharedMemoryEventManager::StartArtProcess(fhicl::ParameterSet pset, size_t process_index)
580 {
581  static std::mutex start_art_mutex;
582  std::unique_lock<std::mutex> lk(start_art_mutex);
583  // TraceLock lk(start_art_mutex, 15, "StartArtLock");
584  restart_art_ = always_restart_art_;
585  auto initialCount = GetAttachedCount();
586  auto startTime = std::chrono::steady_clock::now();
587 
588  if (pset != current_art_pset_ || !current_art_config_file_)
589  {
590  current_art_pset_ = pset;
591  if (manual_art_)
592  current_art_config_file_ = std::make_shared<art_config_file>(pset, GetKey(), GetBroadcastKey());
593  else
594  current_art_config_file_ = std::make_shared<art_config_file>(pset);
595  }
596  std::shared_ptr<std::atomic<pid_t>> pid(new std::atomic<pid_t>(-1));
597  boost::thread thread([=] { RunArt(current_art_config_file_, process_index, pid); });
598  thread.detach();
599 
600  auto currentCount = GetAttachedCount() - initialCount;
601  while ((currentCount < 1 || *pid <= 0) && (TimeUtils::GetElapsedTime(startTime) < 5 || manual_art_))
602  {
603  usleep(10000);
604  currentCount = GetAttachedCount() - initialCount;
605  }
606  if ((currentCount < 1 || *pid <= 0) && manual_art_)
607  {
608  TLOG(TLVL_WARNING) << "Manually-started art process has not connected to shared memory or has bad PID: connected:" << currentCount << ", PID:" << pid;
609  return 0;
610  }
611  if (currentCount < 1 || *pid <= 0)
612  {
613  TLOG(TLVL_WARNING) << "art process has not started after 5s. Check art configuration!"
614  << " (pid=" << *pid << ", attachedCount=" << currentCount << ")";
615  return 0;
616  }
617 
618  TLOG(TLVL_INFO) << std::setw(4) << std::fixed << "art initialization took "
619  << TimeUtils::GetElapsedTime(startTime) << " seconds.";
620 
621  return *pid;
622 }
623 
625 {
626  restart_art_ = false;
627  // current_art_config_file_ = nullptr;
628  // current_art_pset_ = fhicl::ParameterSet();
629 
630  auto check_pids = [&](bool print) {
631  std::unique_lock<std::mutex> lk(art_process_mutex_);
632  for (auto pid = pids.begin(); pid != pids.end();)
633  {
634  // 08-May-2018, KAB: protect against killing invalid PIDS
635 
636  if (*pid <= 0)
637  {
638  TLOG(TLVL_WARNING) << "Removing an invalid PID (" << *pid
639  << ") from the shutdown list.";
640  pid = pids.erase(pid);
641  }
642  else if (kill(*pid, 0) < 0)
643  {
644  pid = pids.erase(pid);
645  }
646  else
647  {
648  if (print)
649  {
650  std::cout << *pid << " ";
651  }
652  ++pid;
653  }
654  }
655  };
656  auto count_pids = [&]() {
657  std::unique_lock<std::mutex> lk(art_process_mutex_);
658  return pids.size();
659  };
660  check_pids(false);
661  if (count_pids() == 0)
662  {
663  TLOG(TLVL_DEBUG + 34) << "All art processes already exited, nothing to do.";
664  usleep(1000);
665  return;
666  }
667 
668  if (!manual_art_)
669  {
670  int graceful_wait_ms = art_event_processing_time_us_ * size() * 10 / 1000;
671  int gentle_wait_ms = art_event_processing_time_us_ * size() * 2 / 1000;
672  int int_wait_ms = art_event_processing_time_us_ * size() / 1000;
673  auto shutdown_start = std::chrono::steady_clock::now();
674 
675  // if (!overwrite_mode_)
676  {
677  TLOG(TLVL_DEBUG + 33) << "Waiting up to " << graceful_wait_ms << " ms for all art processes to exit gracefully";
678  for (int ii = 0; ii < graceful_wait_ms; ++ii)
679  {
680  usleep(1000);
681 
682  check_pids(false);
683  if (count_pids() == 0)
684  {
685  TLOG(TLVL_INFO) << "All art processes exited after " << TimeUtils::GetElapsedTimeMilliseconds(shutdown_start) << " ms.";
686  return;
687  }
688  }
689  }
690 
691  {
692  TLOG(TLVL_DEBUG + 33) << "Gently informing art processes that it is time to shut down";
693  std::unique_lock<std::mutex> lk(art_process_mutex_);
694  for (auto pid : pids)
695  {
696  TLOG(TLVL_DEBUG + 33) << "Sending SIGQUIT to pid " << pid;
697  kill(pid, SIGQUIT);
698  }
699  }
700 
701  TLOG(TLVL_DEBUG + 33) << "Waiting up to " << gentle_wait_ms << " ms for all art processes to exit from SIGQUIT";
702  for (int ii = 0; ii < gentle_wait_ms; ++ii)
703  {
704  usleep(1000);
705 
706  check_pids(false);
707  if (count_pids() == 0)
708  {
709  TLOG(TLVL_INFO) << "All art processes exited after " << TimeUtils::GetElapsedTimeMilliseconds(shutdown_start) << " ms (SIGQUIT).";
710  return;
711  }
712  }
713 
714  {
715  TLOG(TLVL_DEBUG + 33) << "Insisting that the art processes shut down";
716  std::unique_lock<std::mutex> lk(art_process_mutex_);
717  for (auto pid : pids)
718  {
719  kill(pid, SIGINT);
720  }
721  }
722 
723  TLOG(TLVL_DEBUG + 33) << "Waiting up to " << int_wait_ms << " ms for all art processes to exit from SIGINT";
724  for (int ii = 0; ii < int_wait_ms; ++ii)
725  {
726  usleep(1000);
727 
728  check_pids(false);
729 
730  if (count_pids() == 0)
731  {
732  TLOG(TLVL_INFO) << "All art processes exited after " << TimeUtils::GetElapsedTimeMilliseconds(shutdown_start) << " ms (SIGINT).";
733  return;
734  }
735  }
736 
737  TLOG(TLVL_DEBUG + 33) << "Killing remaning art processes with extreme prejudice";
738  while (count_pids() > 0)
739  {
740  {
741  std::unique_lock<std::mutex> lk(art_process_mutex_);
742  kill(*pids.begin(), SIGKILL);
743  usleep(1000);
744  }
745  check_pids(false);
746  }
747  TLOG(TLVL_INFO) << "All art processes exited after " << TimeUtils::GetElapsedTimeMilliseconds(shutdown_start) << " ms (SIGKILL).";
748  }
749  else
750  {
751  std::cout << "Please shut down all art processes, then hit return/enter" << std::endl;
752  while (count_pids() > 0)
753  {
754  std::cout << "The following PIDs are running: ";
755  check_pids(true);
756  std::cout << std::endl;
757  usleep(500000);
758  }
759  }
760 }
761 
762 void artdaq::SharedMemoryEventManager::ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun, int n_art_processes)
763 {
764  TLOG(TLVL_DEBUG + 32) << "ReconfigureArt BEGIN";
765  if (restart_art_ || !always_restart_art_) // Art is running
766  {
767  endOfData();
768  }
769  for (size_t ii = 0; ii < broadcasts_.size(); ++ii)
770  {
771  broadcasts_.MarkBufferEmpty(ii, true);
772  }
773  if (newRun == 0)
774  {
775  newRun = run_id_ + 1;
776  }
777 
778  if (art_pset != current_art_pset_ || !current_art_config_file_)
779  {
780  current_art_pset_ = art_pset;
781  if (manual_art_)
782  current_art_config_file_ = std::make_shared<art_config_file>(art_pset, GetKey(), GetBroadcastKey());
783  else
784  current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
785  }
786 
787  if (n_art_processes != -1)
788  {
789  TLOG(TLVL_INFO) << "Setting number of art processes to " << n_art_processes;
790  num_art_processes_ = n_art_processes;
791  }
792  startRun(newRun);
793  TLOG(TLVL_DEBUG + 32) << "ReconfigureArt END";
794 }
795 
797 {
798  running_ = false;
799  init_fragments_.clear();
800  received_init_frags_.clear();
801  TLOG(TLVL_DEBUG + 32) << "SharedMemoryEventManager::endOfData";
802  restart_art_ = false;
803 
804  auto start = std::chrono::steady_clock::now();
805  auto pendingWriteCount = std::accumulate(buffer_writes_pending_.begin(), buffer_writes_pending_.end(), 0, [](int a, auto& b) { return a + b.second.load(); });
806  TLOG(TLVL_DEBUG + 32) << "endOfData: Waiting for " << pendingWriteCount << " pending writes to complete";
807  while (pendingWriteCount > 0 && TimeUtils::GetElapsedTimeMicroseconds(start) < 1000000)
808  {
809  usleep(10000);
810  pendingWriteCount = std::accumulate(buffer_writes_pending_.begin(), buffer_writes_pending_.end(), 0, [](int a, auto& b) { return a + b.second.load(); });
811  }
812 
813  size_t initialStoreSize = GetOpenEventCount();
814  TLOG(TLVL_DEBUG + 32) << "endOfData: Flushing " << initialStoreSize
815  << " stale events from the SharedMemoryEventManager.";
816  int counter = initialStoreSize;
817  while (!active_buffers_.empty() && counter > 0)
818  {
819  complete_buffer_(*active_buffers_.begin());
820  counter--;
821  }
822  TLOG(TLVL_DEBUG + 32) << "endOfData: Done flushing, there are now " << GetOpenEventCount()
823  << " stale events in the SharedMemoryEventManager.";
824 
825  TLOG(TLVL_DEBUG + 32) << "Waiting for " << (ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_))) << " outstanding buffers...";
826  start = std::chrono::steady_clock::now();
827  auto lastReadCount = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
828  auto end_of_data_wait_us = art_event_processing_time_us_ * (lastReadCount > 0 ? lastReadCount : 1); // size();
829 
830  auto outstanding_buffer_wait_time = art_event_processing_time_us_ > 100000 ? 100000 : art_event_processing_time_us_;
831 
832  // We will wait until no buffer has been read for the end of data wait seconds, or no art processes are left.
833  while (lastReadCount > 0 && (end_of_data_wait_us == 0 || TimeUtils::GetElapsedTimeMicroseconds(start) < end_of_data_wait_us) && get_art_process_count_() > 0)
834  {
835  auto temp = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
836  if (temp != lastReadCount)
837  {
838  TLOG(TLVL_DEBUG + 33) << "Waiting for " << temp << " outstanding buffers...";
839  lastReadCount = temp;
840  start = std::chrono::steady_clock::now();
841  }
842  if (lastReadCount > 0)
843  {
844  TLOG(TLVL_DEBUG + 38) << "About to sleep " << outstanding_buffer_wait_time << " us - lastReadCount=" << lastReadCount << " size=" << size() << " end_of_data_wait_us=" << end_of_data_wait_us;
845  usleep(outstanding_buffer_wait_time);
846  }
847  }
848 
849  TLOG(TLVL_DEBUG + 32) << "endOfData: After wait for outstanding buffers. Still outstanding: " << lastReadCount << ", time waited: "
850  << TimeUtils::GetElapsedTime(start) << " s / " << (end_of_data_wait_us / 1000000.0) << " s, art process count: " << get_art_process_count_();
851 
852  TLOG(TLVL_DEBUG + 32) << "endOfData: Broadcasting EndOfData Fragment";
853  FragmentPtrs broadcast;
854  broadcast.emplace_back(Fragment::eodFrag(GetBufferCount()));
855  bool success = broadcastFragments_(broadcast);
856  if (!success)
857  {
858  TLOG(TLVL_DEBUG + 32) << "endOfData: Clearing buffers to make room for EndOfData Fragment";
859  for (size_t ii = 0; ii < broadcasts_.size(); ++ii)
860  {
861  broadcasts_.MarkBufferEmpty(ii, true);
862  }
863  broadcastFragments_(broadcast);
864  }
865  auto endOfDataProcessingStart = std::chrono::steady_clock::now();
866  while (get_art_process_count_() > 0)
867  {
868  TLOG(TLVL_DEBUG + 32) << "There are " << get_art_process_count_() << " art processes remaining. Proceeding to shutdown.";
869 
870  ShutdownArtProcesses(art_processes_);
871  }
872  TLOG(TLVL_DEBUG + 32) << "It took " << TimeUtils::GetElapsedTime(endOfDataProcessingStart) << " s for all art processes to close after sending EndOfData Fragment";
873 
874  ResetAttachedCount();
875 
876  TLOG(TLVL_DEBUG + 32) << "endOfData: Clearing buffers";
877  for (size_t ii = 0; ii < size(); ++ii)
878  {
879  MarkBufferEmpty(ii, true);
880  }
881  // ELF 06/04/2018: Cannot clear broadcasts here, we want the EndOfDataFragment to persist until it's time to start art again...
882  // TLOG(TLVL_DEBUG + 33) << "endOfData: Clearing broadcast buffers";
883  // for (size_t ii = 0; ii < broadcasts_.size(); ++ii)
884  // {
885  // broadcasts_.MarkBufferEmpty(ii, true);
886  // }
887  released_events_.clear();
888  released_incomplete_events_.clear();
889 
890  TLOG(TLVL_DEBUG + 32) << "endOfData END";
891  TLOG(TLVL_INFO) << "EndOfData Complete. There were " << GetLastSeenBufferID() << " buffers processed.";
892  return true;
893 }
894 
896 {
897  running_ = true;
898  init_fragments_.clear();
899  received_init_frags_.clear();
900  statsHelper_.resetStatistics();
901  TLOG(TLVL_DEBUG + 33) << "startRun: Clearing broadcast buffers";
902  for (size_t ii = 0; ii < broadcasts_.size(); ++ii)
903  {
904  broadcasts_.MarkBufferEmpty(ii, true);
905  }
906  released_events_.clear();
907  released_incomplete_events_.clear();
908  StartArt();
909  run_id_ = runID;
910  {
911  std::unique_lock<std::mutex> lk(subrun_event_map_mutex_);
912  subrun_event_map_.clear();
913  subrun_event_map_[0] = 1;
914  }
915  run_event_count_ = 0;
916  run_incomplete_event_count_ = 0;
917  requests_ = std::make_unique<RequestSender>(data_pset_);
918  if (requests_)
919  {
920  requests_->SetRunNumber(static_cast<uint32_t>(run_id_));
921  }
922  if (data_pset_.has_key("routing_token_config"))
923  {
924  auto rmPset = data_pset_.get<fhicl::ParameterSet>("routing_token_config");
925  if (rmPset.get<bool>("use_routing_manager", false))
926  {
927  tokens_ = std::make_unique<TokenSender>(rmPset);
928  tokens_->SetRunNumber(static_cast<uint32_t>(run_id_));
929  tokens_->SendRoutingToken(queue_size_, run_id_);
930  }
931  }
932  TLOG(TLVL_DEBUG + 32) << "Starting run " << run_id_
933  << ", max queue size = "
934  << queue_size_
935  << ", queue size = "
936  << GetLockedBufferCount();
937  if (metricMan)
938  {
939  metricMan->sendMetric("Run Number", static_cast<uint64_t>(run_id_), "Run", 1, MetricMode::LastPoint | MetricMode::Persist);
940  }
941 }
942 
944 {
945  TLOG(TLVL_INFO) << "Ending run " << run_id_;
946  FragmentPtr endOfRunFrag(new Fragment(static_cast<size_t>(ceil(sizeof(my_rank) /
947  static_cast<double>(sizeof(Fragment::value_type))))));
948 
949  TLOG(TLVL_DEBUG + 32) << "Shutting down RequestSender";
950  requests_.reset(nullptr);
951  TLOG(TLVL_DEBUG + 32) << "Shutting down TokenSender";
952  tokens_.reset(nullptr);
953 
954  TLOG(TLVL_DEBUG + 32) << "Broadcasting EndOfRun Fragment";
955  endOfRunFrag->setSystemType(Fragment::EndOfRunFragmentType);
956  *endOfRunFrag->dataBegin() = my_rank;
957  FragmentPtrs broadcast;
958  broadcast.emplace_back(std::move(endOfRunFrag));
959  broadcastFragments_(broadcast);
960 
961  TLOG(TLVL_INFO) << "Run " << run_id_ << " has ended. There were " << run_event_count_ << " events in this run.";
962  run_event_count_ = 0;
963  run_incomplete_event_count_ = 0;
964  oversize_fragment_count_ = 0;
965  {
966  std::unique_lock<std::mutex> lk(subrun_event_map_mutex_);
967  subrun_event_map_.clear();
968  subrun_event_map_[0] = 1;
969  }
970  return true;
971 }
972 
974 {
975  // Generated EndOfSubrun Fragments have Sequence ID 0 and should be ignored
976  if (boundary == 0 || boundary == Fragment::InvalidSequenceID)
977  {
978  return;
979  }
980 
981  std::unique_lock<std::mutex> lk(subrun_event_map_mutex_);
982 
983  // Don't re-rollover to an already-defined subrun
984  if (!subrun_event_map_.empty() && subrun_event_map_.rbegin()->second == subrun)
985  {
986  return;
987  }
988  TLOG(TLVL_INFO) << "Will roll over to subrun " << subrun << " when I reach Sequence ID " << boundary;
989  subrun_event_map_[boundary] = subrun;
990  while (subrun_event_map_.size() > max_subrun_event_map_length_)
991  {
992  subrun_event_map_.erase(subrun_event_map_.begin());
993  }
994 }
995 
997 {
998  Fragment::sequence_id_t seqID = 0;
999  subrun_id_t subrun = 0;
1000  {
1001  std::unique_lock<std::mutex> lk(subrun_event_map_mutex_);
1002  for (auto& it : subrun_event_map_)
1003  {
1004  if (it.first >= seqID)
1005  {
1006  seqID = it.first + 1;
1007  }
1008  if (it.second >= subrun)
1009  {
1010  subrun = it.second + 1;
1011  }
1012  }
1013  }
1014  rolloverSubrun(seqID, subrun);
1015 }
1016 
1018 {
1019  if (metricMan)
1020  {
1021  metricMan->sendMetric("Open Event Count", GetOpenEventCount(), "events", 1, MetricMode::LastPoint);
1022  metricMan->sendMetric("Pending Event Count", GetPendingEventCount(), "events", 1, MetricMode::LastPoint);
1023  }
1024 
1025  if (open_event_report_interval_ms_ > 0 && GetLockedBufferCount() != 0u)
1026  {
1027  if (TimeUtils::GetElapsedTimeMilliseconds(last_open_event_report_time_) < static_cast<size_t>(open_event_report_interval_ms_))
1028  {
1029  return;
1030  }
1031 
1032  last_open_event_report_time_ = std::chrono::steady_clock::now();
1033  std::ostringstream oss;
1034  oss << "Open Events (expecting " << num_fragments_per_event_ << " Fragments): ";
1035  for (auto& ev : active_buffers_)
1036  {
1037  auto hdr = getEventHeader_(ev);
1038  oss << hdr->sequence_id << " (has " << GetFragmentCount(hdr->sequence_id) << " Fragments), ";
1039  }
1040  TLOG(TLVL_DEBUG + 32) << oss.str();
1041  }
1042 }
1043 
1044 bool artdaq::SharedMemoryEventManager::broadcastFragments_(FragmentPtrs& frags)
1045 {
1046  if (frags.empty())
1047  {
1048  TLOG(TLVL_ERROR) << "Requested broadcast but no Fragments given!";
1049  return false;
1050  }
1051  if (!broadcasts_.IsValid())
1052  {
1053  TLOG(TLVL_ERROR) << "Broadcast attempted but broadcast shared memory is unavailable!";
1054  return false;
1055  }
1056  TLOG(TLVL_DEBUG + 32) << "Broadcasting Fragments with seqID=" << frags.front()->sequenceID()
1057  << ", type " << detail::RawFragmentHeader::SystemTypeToString(frags.front()->type())
1058  << ", size=" << frags.front()->sizeBytes() << "B.";
1059  auto buffer = broadcasts_.GetBufferForWriting(false);
1060  TLOG(TLVL_DEBUG + 32) << "broadcastFragments_: after getting buffer 1st buffer=" << buffer;
1061  auto start_time = std::chrono::steady_clock::now();
1062  while (buffer == -1 && TimeUtils::GetElapsedTimeMilliseconds(start_time) < static_cast<size_t>(broadcast_timeout_ms_))
1063  {
1064  usleep(10000);
1065  buffer = broadcasts_.GetBufferForWriting(false);
1066  }
1067  TLOG(TLVL_DEBUG + 32) << "broadcastFragments_: after getting buffer w/timeout, buffer=" << buffer << ", elapsed time=" << TimeUtils::GetElapsedTime(start_time) << " s.";
1068  if (buffer == -1)
1069  {
1070  TLOG(TLVL_ERROR) << "Broadcast of fragment type " << frags.front()->typeString() << " failed due to timeout waiting for buffer!";
1071  return false;
1072  }
1073 
1074  TLOG(TLVL_DEBUG + 32) << "broadcastFragments_: Filling in RawEventHeader";
1075  auto hdr = reinterpret_cast<detail::RawEventHeader*>(broadcasts_.GetBufferStart(buffer)); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
1076  hdr->run_id = run_id_;
1077  hdr->subrun_id = GetSubrunForSequenceID(frags.front()->sequenceID());
1078  hdr->sequence_id = frags.front()->sequenceID();
1079  hdr->is_complete = true;
1080  broadcasts_.IncrementWritePos(buffer, sizeof(detail::RawEventHeader));
1081 
1082  for (auto& frag : frags)
1083  {
1084  TLOG(TLVL_DEBUG + 32) << "broadcastFragments_ before Write calls";
1085  if (frag->sequenceID() != hdr->sequence_id || frag->type() != frags.front()->type())
1086  {
1087  TLOG(TLVL_WARNING) << "Not sending fragment because its SequenceID or Type disagrees with leading Fragment";
1088  continue;
1089  }
1090  broadcasts_.Write(buffer, frag->headerAddress(), frag->size() * sizeof(RawDataType));
1091  }
1092 
1093  TLOG(TLVL_DEBUG + 32) << "broadcastFragments_ Marking buffer full";
1094  broadcasts_.MarkBufferFull(buffer, -1);
1095  TLOG(TLVL_DEBUG + 32) << "broadcastFragments_ Complete";
1096  return true;
1097 }
1098 
1099 artdaq::detail::RawEventHeader* artdaq::SharedMemoryEventManager::getEventHeader_(int buffer)
1100 {
1101  return reinterpret_cast<detail::RawEventHeader*>(GetBufferStart(buffer)); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
1102 }
1103 
1105 {
1106  std::unique_lock<std::mutex> lk(subrun_event_map_mutex_);
1107 
1108  TLOG(TLVL_DEBUG + 33) << "GetSubrunForSequenceID BEGIN map size = " << subrun_event_map_.size();
1109  auto it = subrun_event_map_.begin();
1110  subrun_id_t subrun = 1;
1111 
1112  while (it->first <= seqID && it != subrun_event_map_.end())
1113  {
1114  TLOG(TLVL_DEBUG + 33) << "Map has sequence ID " << it->first << ", subrun " << it->second << " (looking for <= " << seqID << ")";
1115  subrun = it->second;
1116  ++it;
1117  }
1118 
1119  TLOG(TLVL_DEBUG + 32) << "GetSubrunForSequenceID returning subrun " << subrun << " for sequence ID " << seqID;
1120  return subrun;
1121 }
1122 
1123 int artdaq::SharedMemoryEventManager::getBufferForSequenceID_(Fragment::sequence_id_t seqID, bool create_new, Fragment::timestamp_t timestamp)
1124 {
1125  TLOG(TLVL_DEBUG + 34) << "getBufferForSequenceID " << seqID << " BEGIN";
1126  std::unique_lock<std::mutex> lk(sequence_id_mutex_);
1127 
1128  TLOG(TLVL_DEBUG + 34) << "getBufferForSequenceID obtained sequence_id_mutex for seqid=" << seqID;
1129 
1130  auto buffers = GetBuffersOwnedByManager();
1131  for (auto& buf : buffers)
1132  {
1133  auto hdr = getEventHeader_(buf);
1134  if (hdr->sequence_id == seqID)
1135  {
1136  TLOG(TLVL_DEBUG + 34) << "getBufferForSequenceID " << seqID << " returning " << buf;
1137  return buf;
1138  }
1139  }
1140 
1141 #if !ART_SUPPORTS_DUPLICATE_EVENTS
1142  if (released_incomplete_events_.count(seqID) != 0u)
1143  {
1144  TLOG(TLVL_ERROR) << "Event " << seqID << " has already been marked \"Incomplete\" and sent to art!";
1145  return -2;
1146  }
1147  if (released_events_.count(seqID) != 0u)
1148  {
1149  TLOG(TLVL_ERROR) << "Event " << seqID << " has already been completed and released to art! Check configuration for inconsistent Fragment count per event!";
1150  return -2;
1151  }
1152 #endif
1153 
1154  if (!create_new)
1155  {
1156  return -1;
1157  }
1158 
1159  check_pending_buffers_(lk);
1160  int new_buffer = GetBufferForWriting(false);
1161 
1162  if (new_buffer == -1)
1163  {
1164  new_buffer = GetBufferForWriting(overwrite_mode_);
1165  }
1166 
1167  if (new_buffer == -1)
1168  {
1169  return -1;
1170  }
1171  TLOG(TLVL_BUFLCK) << "getBufferForSequenceID_: obtaining buffer_mutexes lock for buffer " << new_buffer;
1172  std::unique_lock<std::mutex> buffer_lk(buffer_mutexes_.at(new_buffer));
1173  TLOG(TLVL_BUFLCK) << "getBufferForSequenceID_: obtained buffer_mutexes lock for buffer " << new_buffer;
1174 
1175  event_timing_[new_buffer] = std::chrono::steady_clock::now();
1176 
1177  auto hdr = getEventHeader_(new_buffer);
1178  hdr->is_complete = false;
1179  hdr->run_id = run_id_;
1180  hdr->subrun_id = GetSubrunForSequenceID(seqID);
1181  hdr->event_id = use_sequence_id_for_event_number_ ? static_cast<uint32_t>(seqID) : static_cast<uint32_t>(timestamp);
1182  hdr->sequence_id = seqID;
1183  hdr->timestamp = timestamp;
1184  buffer_writes_pending_[new_buffer] = 0;
1185  IncrementWritePos(new_buffer, sizeof(detail::RawEventHeader));
1186  SetMFIteration("Sequence ID " + std::to_string(seqID));
1187 
1188  TLOG(TLVL_BUFFER) << "getBufferForSequenceID placing " << new_buffer << " to active.";
1189  active_buffers_.insert(new_buffer);
1190  TLOG(TLVL_BUFFER) << "Buffer occupancy now (total,full,reading,empty,pending,active)=("
1191  << size() << ","
1192  << ReadReadyCount() << ","
1193  << WriteReadyCount(true) - WriteReadyCount(false) - ReadReadyCount() << ","
1194  << WriteReadyCount(false) << ","
1195  << pending_buffers_.size() << ","
1196  << active_buffers_.size() << ")";
1197 
1198  if (requests_)
1199  {
1200  requests_->AddRequest(seqID, timestamp);
1201  }
1202  TLOG(TLVL_DEBUG + 34) << "getBufferForSequenceID " << seqID << " returning newly initialized buffer " << new_buffer;
1203  return new_buffer;
1204 }
1205 
1206 bool artdaq::SharedMemoryEventManager::hasFragments_(int buffer)
1207 {
1208  if (buffer == -1)
1209  {
1210  return true;
1211  }
1212  if (!CheckBuffer(buffer, BufferSemaphoreFlags::Writing))
1213  {
1214  return true;
1215  }
1216  ResetReadPos(buffer);
1217  IncrementReadPos(buffer, sizeof(detail::RawEventHeader));
1218  return MoreDataInBuffer(buffer);
1219 }
1220 
1221 void artdaq::SharedMemoryEventManager::complete_buffer_(int buffer)
1222 {
1223  auto hdr = getEventHeader_(buffer);
1224  if (hdr->is_complete)
1225  {
1226  TLOG(TLVL_DEBUG + 32) << "complete_buffer_: This fragment completes event " << hdr->sequence_id << ".";
1227 
1228  {
1229  TLOG(TLVL_BUFFER) << "complete_buffer_ moving " << buffer << " from active to pending.";
1230 
1231  TLOG(TLVL_BUFLCK) << "complete_buffer_: obtaining sequence_id_mutex lock for seqid=" << hdr->sequence_id;
1232  std::unique_lock<std::mutex> lk(sequence_id_mutex_);
1233  TLOG(TLVL_BUFLCK) << "complete_buffer_: obtained sequence_id_mutex lock for seqid=" << hdr->sequence_id;
1234  active_buffers_.erase(buffer);
1235  pending_buffers_.insert(buffer);
1236  released_events_.insert(hdr->sequence_id);
1237  while (released_events_.size() > max_event_list_length_)
1238  {
1239  released_events_.erase(released_events_.begin());
1240  }
1241 
1242  TLOG(TLVL_BUFFER) << "Buffer occupancy now (total,full,reading,empty,pending,active)=("
1243  << size() << ","
1244  << ReadReadyCount() << ","
1245  << WriteReadyCount(true) - WriteReadyCount(false) - ReadReadyCount() << ","
1246  << WriteReadyCount(false) << ","
1247  << pending_buffers_.size() << ","
1248  << active_buffers_.size() << ")";
1249  }
1250  if (requests_)
1251  {
1252  requests_->RemoveRequest(hdr->sequence_id);
1253  }
1254  }
1255  CheckPendingBuffers();
1256 }
1257 
1258 bool artdaq::SharedMemoryEventManager::bufferComparator(int bufA, int bufB)
1259 {
1260  return getEventHeader_(bufA)->sequence_id < getEventHeader_(bufB)->sequence_id;
1261 }
1262 
1264 {
1265  TLOG(TLVL_BUFLCK) << "CheckPendingBuffers: Obtaining sequence_id_mutex_";
1266  std::unique_lock<std::mutex> lk(sequence_id_mutex_);
1267  TLOG(TLVL_BUFLCK) << "CheckPendingBuffers: Obtained sequence_id_mutex_";
1268  check_pending_buffers_(lk);
1269 }
1270 
1271 void artdaq::SharedMemoryEventManager::check_pending_buffers_(std::unique_lock<std::mutex> const& lock)
1272 {
1273  TLOG(TLVL_DEBUG + 34) << "check_pending_buffers_ BEGIN Locked=" << std::boolalpha << lock.owns_lock();
1274 
1275  auto buffers = GetBuffersOwnedByManager();
1276  for (auto buf : buffers)
1277  {
1278  if (ResetBuffer(buf) && (pending_buffers_.count(buf) == 0u))
1279  {
1280  TLOG(TLVL_DEBUG + 36) << "check_pending_buffers_ Incomplete buffer detected, buf=" << buf << " active_bufers_.count(buf)=" << active_buffers_.count(buf) << " buffer_writes_pending_[buf]=" << buffer_writes_pending_[buf].load();
1281  auto hdr = getEventHeader_(buf);
1282  if ((active_buffers_.count(buf) != 0u) && buffer_writes_pending_[buf].load() == 0)
1283  {
1284  if (requests_)
1285  {
1286  requests_->RemoveRequest(hdr->sequence_id);
1287  }
1288  TLOG(TLVL_BUFFER) << "check_pending_buffers_ moving buffer " << buf << " from active to pending";
1289  active_buffers_.erase(buf);
1290  pending_buffers_.insert(buf);
1291  TLOG(TLVL_BUFFER) << "Buffer occupancy now (total,full,reading,empty,pending,active)=("
1292  << size() << ","
1293  << ReadReadyCount() << ","
1294  << WriteReadyCount(true) - WriteReadyCount(false) - ReadReadyCount() << ","
1295  << WriteReadyCount(false) << ","
1296  << pending_buffers_.size() << ","
1297  << active_buffers_.size() << ")";
1298 
1299  run_incomplete_event_count_++;
1300  if (metricMan)
1301  {
1302  metricMan->sendMetric("Incomplete Event Rate", 1, "events/s", 3, MetricMode::Rate);
1303  }
1304  if (released_incomplete_events_.count(hdr->sequence_id) == 0u)
1305  {
1306  released_incomplete_events_[hdr->sequence_id] = num_fragments_per_event_ - GetFragmentCountInBuffer(buf);
1307  }
1308  else
1309  {
1310  released_incomplete_events_[hdr->sequence_id] -= GetFragmentCountInBuffer(buf);
1311  }
1312 
1313  TLOG(TLVL_WARNING) << "Event " << hdr->sequence_id
1314  << " was opened " << TimeUtils::GetElapsedTime(event_timing_[buf]) << " s ago"
1315  << " and has timed out (missing " << released_incomplete_events_[hdr->sequence_id] << " Fragments)."
1316  << "Scheduling release to art.";
1317  }
1318  }
1319  }
1320 
1321  std::list<int> sorted_buffers(pending_buffers_.begin(), pending_buffers_.end());
1322  sorted_buffers.sort([this](int a, int b) { return bufferComparator(a, b); });
1323 
1324  auto counter = 0;
1325  double eventSize = 0;
1326  double eventTime = 0;
1327  for (auto buf : sorted_buffers)
1328  {
1329  auto hdr = getEventHeader_(buf);
1330  auto thisEventSize = BufferDataSize(buf);
1331 
1332  TLOG(TLVL_DEBUG + 32) << "Releasing event " << std::to_string(hdr->sequence_id) << " in buffer " << buf << " to art, "
1333  << "event_size=" << thisEventSize << ", buffer_size=" << BufferSize();
1334  statsHelper_.addSample(EVENTS_RELEASED_STAT_KEY, thisEventSize);
1335 
1336  TLOG(TLVL_BUFFER) << "check_pending_buffers_ removing buffer " << buf << " moving from pending to full";
1337  MarkBufferFull(buf);
1338  run_event_count_++;
1339  counter++;
1340  eventSize += thisEventSize;
1341  eventTime += TimeUtils::GetElapsedTime(event_timing_[buf]);
1342  pending_buffers_.erase(buf);
1343  TLOG(TLVL_BUFFER) << "Buffer occupancy now (total,full,reading,empty,pending,active)=("
1344  << size() << ","
1345  << ReadReadyCount() << ","
1346  << WriteReadyCount(true) - WriteReadyCount(false) - ReadReadyCount() << ","
1347  << WriteReadyCount(false) << ","
1348  << pending_buffers_.size() << ","
1349  << active_buffers_.size() << ")";
1350  }
1351 
1352  if (tokens_ && tokens_->RoutingTokenSendsEnabled())
1353  {
1354  TLOG(TLVL_DEBUG + 33) << "Sent tokens: " << tokens_->GetSentTokenCount() << ", Event count: " << run_event_count_;
1355  auto outstanding_tokens = tokens_->GetSentTokenCount() - run_event_count_;
1356  auto available_buffers = WriteReadyCount(overwrite_mode_);
1357 
1358  TLOG(TLVL_DEBUG + 33) << "check_pending_buffers_: outstanding_tokens: " << outstanding_tokens << ", available_buffers: " << available_buffers
1359  << ", tokens_to_send: " << available_buffers - outstanding_tokens;
1360 
1361  if (available_buffers > outstanding_tokens)
1362  {
1363  auto tokens_to_send = available_buffers - outstanding_tokens;
1364 
1365  while (tokens_to_send > 0)
1366  {
1367  TLOG(35) << "check_pending_buffers_: Sending a Routing Token";
1368  tokens_->SendRoutingToken(1, run_id_);
1369  tokens_to_send--;
1370  }
1371  }
1372  }
1373 
1374  if (statsHelper_.readyToReport())
1375  {
1376  std::string statString = buildStatisticsString_();
1377  TLOG(TLVL_INFO) << statString;
1378  }
1379 
1380  if (metricMan)
1381  {
1382  TLOG(TLVL_DEBUG + 34) << "check_pending_buffers_: Sending Metrics";
1383  metricMan->sendMetric("Event Rate", counter, "Events", 1, MetricMode::Rate);
1384  metricMan->sendMetric("Data Rate", eventSize, "Bytes", 1, MetricMode::Rate);
1385  if (counter > 0)
1386  {
1387  metricMan->sendMetric("Average Event Size", eventSize / counter, "Bytes", 1, MetricMode::Average);
1388  metricMan->sendMetric("Average Event Building Time", eventTime / counter, "s", 1, MetricMode::Average);
1389  }
1390 
1391  metricMan->sendMetric("Events Released to art this run", run_event_count_, "Events", 1, MetricMode::LastPoint);
1392  metricMan->sendMetric("Incomplete Events Released to art this run", run_incomplete_event_count_, "Events", 1, MetricMode::LastPoint);
1393  if (tokens_ && tokens_->RoutingTokenSendsEnabled())
1394  {
1395  metricMan->sendMetric("Tokens sent", tokens_->GetSentTokenCount(), "Tokens", 2, MetricMode::LastPoint);
1396  }
1397 
1398  auto bufferReport = GetBufferReport();
1399  int full = 0, empty = 0, writing = 0, reading = 0;
1400  for (auto& buf : bufferReport)
1401  {
1402  switch (buf.second)
1403  {
1404  case BufferSemaphoreFlags::Full:
1405  full++;
1406  break;
1407  case BufferSemaphoreFlags::Empty:
1408  empty++;
1409  break;
1410  case BufferSemaphoreFlags::Writing:
1411  writing++;
1412  break;
1413  case BufferSemaphoreFlags::Reading:
1414  reading++;
1415  break;
1416  }
1417  }
1418  auto total = size();
1419  TLOG(TLVL_DEBUG + 36) << "Buffer usage: full=" << full << ", empty=" << empty << ", writing=" << writing << ", reading=" << reading << ", total=" << total;
1420 
1421  metricMan->sendMetric("Shared Memory Full Buffers", full, "buffers", 2, MetricMode::LastPoint);
1422  metricMan->sendMetric("Shared Memory Available Buffers", empty, "buffers", 2, MetricMode::LastPoint);
1423  metricMan->sendMetric("Shared Memory Pending Buffers", writing, "buffers", 2, MetricMode::LastPoint);
1424  metricMan->sendMetric("Shared Memory Reading Buffers", reading, "buffers", 2, MetricMode::LastPoint);
1425  if (total > 0)
1426  {
1427  metricMan->sendMetric("Shared Memory Full %", full * 100 / static_cast<double>(total), "%", 2, MetricMode::LastPoint);
1428  metricMan->sendMetric("Shared Memory Available %", empty * 100 / static_cast<double>(total), "%", 2, MetricMode::LastPoint);
1429  }
1430  }
1431  TLOG(TLVL_DEBUG + 34) << "check_pending_buffers_ END";
1432 }
1433 
1434 std::vector<char*> artdaq::SharedMemoryEventManager::parse_art_command_line_(const std::shared_ptr<art_config_file>& config_file, size_t process_index)
1435 {
1436  auto offset_index = process_index + art_process_index_offset_;
1437  TLOG(TLVL_DEBUG + 37) << "parse_art_command_line_: Parsing command line " << art_cmdline_ << ", config_file: " << config_file->getFileName() << ", index: " << process_index << " (w/offset: " << offset_index << ")";
1438  std::string art_cmdline_tmp = art_cmdline_;
1439  auto filenameit = art_cmdline_tmp.find("#CONFIG_FILE#");
1440  if (filenameit != std::string::npos)
1441  {
1442  art_cmdline_tmp.replace(filenameit, 13, config_file->getFileName());
1443  }
1444  auto indexit = art_cmdline_tmp.find("#PROCESS_INDEX#");
1445  if (indexit != std::string::npos)
1446  {
1447  art_cmdline_tmp.replace(indexit, 15, std::to_string(offset_index));
1448  }
1449  TLOG(TLVL_DEBUG + 37) << "parse_art_command_line_: After replacing index and config parameters, command line is " << art_cmdline_tmp;
1450 
1451  std::istringstream iss(art_cmdline_tmp);
1452  auto tokens = std::vector<std::string>{std::istream_iterator<std::string>{iss}, std::istream_iterator<std::string>{}};
1453  std::vector<char*> output;
1454 
1455  for (auto& token : tokens)
1456  {
1457  TLOG(TLVL_DEBUG + 37) << "parse_art_command_line_: Adding cmdline token " << token << " to output list";
1458  output.emplace_back(new char[token.length() + 1]);
1459  memcpy(output.back(), token.c_str(), token.length());
1460  output.back()[token.length()] = '\0'; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1461  }
1462  output.emplace_back(nullptr);
1463 
1464  return output;
1465 }
1466 
1467 void artdaq::SharedMemoryEventManager::send_init_frags_()
1468 {
1469  if (init_fragments_.size() >= init_fragment_count_ && init_fragment_count_ > 0)
1470  {
1471  TLOG(TLVL_INFO) << "Broadcasting " << init_fragments_.size() << " Init Fragment(s) to all art subprocesses...";
1472 
1473 #if 0
1474  std::string fileName = "receiveInitMessage_" + std::to_string(my_rank) + ".bin";
1475  std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
1476  ostream.write(reinterpret_cast<char*>(init_fragment_->dataBeginBytes()), init_fragment_->dataSizeBytes());
1477  ostream.close();
1478 #endif
1479 
1480  broadcastFragments_(init_fragments_);
1481  TLOG(TLVL_DEBUG + 33) << "Init Fragment sent";
1482  }
1483  else if (init_fragment_count_ > 0 && init_fragments_.size() == 0)
1484  {
1485  TLOG(TLVL_WARNING) << "Cannot send Init Fragment(s) because I haven't yet received them! Set send_init_fragments to false or init_fragment_count to 0 if this process does not receive serialized art events to avoid potentially lengthy timeouts!";
1486  }
1487  else if (init_fragment_count_ > 0)
1488  {
1489  TLOG(TLVL_INFO) << "Cannot send Init Fragment(s) because I haven't yet received them (have " << init_fragments_.size() << " of " << init_fragment_count_ << ")!";
1490  }
1491  else
1492  {
1493  // Send an empty Init Fragment so that ArtdaqInput knows that this is a pure-Fragment input
1494  artdaq::FragmentPtrs begin_run_fragments_;
1495  begin_run_fragments_.emplace_back(new artdaq::Fragment());
1496  begin_run_fragments_.back()->setSystemType(artdaq::Fragment::InitFragmentType);
1497  broadcastFragments_(begin_run_fragments_);
1498  }
1499 }
1500 
1502 {
1503  static std::mutex init_fragment_mutex;
1504  std::lock_guard<std::mutex> lk(init_fragment_mutex);
1505  if (received_init_frags_.count(frag->fragmentID()) == 0)
1506  {
1507  TLOG(TLVL_DEBUG + 32) << "Received Init Fragment from rank " << frag->fragmentID() << ". Now have " << init_fragments_.size() + 1 << " of " << init_fragment_count_;
1508  received_init_frags_.insert(frag->fragmentID());
1509  init_fragments_.push_back(std::move(frag));
1510 
1511  // Don't send until all init fragments have been received
1512  if (init_fragments_.size() >= init_fragment_count_)
1513  {
1514  send_init_frags_();
1515  }
1516  }
1517  else
1518  {
1519  TLOG(TLVL_DEBUG + 33) << "Ignoring duplicate Init Fragment from rank " << frag->fragmentID();
1520  }
1521 }
1522 
1524 {
1525  TLOG(TLVL_DEBUG + 32) << "UpdateArtConfiguration BEGIN";
1526  if (art_pset != current_art_pset_ || !current_art_config_file_)
1527  {
1528  current_art_pset_ = art_pset;
1529  if (manual_art_)
1530  current_art_config_file_ = std::make_shared<art_config_file>(art_pset, GetKey(), GetBroadcastKey());
1531  else
1532  current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
1533  }
1534  TLOG(TLVL_DEBUG + 32) << "UpdateArtConfiguration END";
1535 }
1536 
1537 std::string artdaq::SharedMemoryEventManager::buildStatisticsString_() const
1538 {
1539  std::ostringstream oss;
1540  oss << app_name << " statistics:" << std::endl;
1541 
1542  artdaq::MonitoredQuantityPtr mqPtr =
1543  artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(EVENTS_RELEASED_STAT_KEY);
1544  if (mqPtr.get() != nullptr)
1545  {
1546  artdaq::MonitoredQuantityStats stats;
1547  mqPtr->getStats(stats);
1548  oss << " Event statistics: " << stats.recentSampleCount << " events released at " << stats.recentSampleRate
1549  << " events/sec, effective data rate = "
1550  << (stats.recentValueRate / 1024.0 / 1024.0)
1551  << " MB/sec, monitor window = " << stats.recentDuration
1552  << " sec, min::max event size = " << (stats.recentValueMin / 1024.0 / 1024.0)
1553  << "::" << (stats.recentValueMax / 1024.0 / 1024.0) << " MB" << std::endl;
1554  if (stats.recentSampleRate > 0.0)
1555  {
1556  oss << " Average time per event: ";
1557  oss << " elapsed time = " << (1.0 / stats.recentSampleRate) << " sec" << std::endl;
1558  }
1559  }
1560 
1561  mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(FRAGMENTS_RECEIVED_STAT_KEY);
1562  if (mqPtr.get() != nullptr)
1563  {
1564  artdaq::MonitoredQuantityStats stats;
1565  mqPtr->getStats(stats);
1566  oss << " Fragment statistics: " << stats.recentSampleCount << " fragments received at " << stats.recentSampleRate
1567  << " fragments/sec, effective data rate = "
1568  << (stats.recentValueRate / 1024.0 / 1024.0)
1569  << " MB/sec, monitor window = " << stats.recentDuration
1570  << " sec, min::max fragment size = " << (stats.recentValueMin / 1024.0 / 1024.0)
1571  << "::" << (stats.recentValueMax / 1024.0 / 1024.0) << " MB" << std::endl;
1572  }
1573 
1574  oss << " Event counts: Run -- " << run_event_count_ << " Total, " << run_incomplete_event_count_ << " Incomplete."
1575  << " Subrun -- " << subrun_event_count_ << " Total, " << subrun_incomplete_event_count_ << " Incomplete. "
1576  << std::endl;
1577  return oss.str();
1578 }
void addMonitoredQuantityName(std::string const &statKey)
Add a MonitoredQuantity name to the list.
void AddInitFragment(FragmentPtr &frag)
Set the stored Init fragment, if one has not yet been set already.
void ShutdownArtProcesses(std::set< pid_t > &pids)
Shutdown a set of art processes.
virtual ~SharedMemoryEventManager()
SharedMemoryEventManager Destructor.
Fragment::sequence_id_t sequence_id_t
Copy Fragment::sequence_id_t into local scope.
void ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun=0, int n_art_processes=-1)
Restart all art processes, using the given fhicl code to configure the new art processes.
RawDataType * WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable=false)
Get a pointer to a reserved memory area for the given Fragment header.
RawEvent::run_id_t run_id_t
Copy RawEvent::run_id_t into local scope.
size_t GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in an event.
void UpdateArtConfiguration(fhicl::ParameterSet art_pset)
Updates the internally-stored copy of the art configuration.
pid_t StartArtProcess(fhicl::ParameterSet pset, size_t process_index)
Start one art process.
void StartArt()
Start all the art processes.
subrun_id_t GetSubrunForSequenceID(Fragment::sequence_id_t seqID)
Get the subrun number that the given Sequence ID would be assigned to.
SharedMemoryEventManager(const fhicl::ParameterSet &pset, fhicl::ParameterSet art_pset)
SharedMemoryEventManager Constructor.
void RunArt(const std::shared_ptr< art_config_file > &config_file, size_t process_index, const std::shared_ptr< std::atomic< pid_t >> &pid_out)
Run an art instance, recording the return codes and restarting it until the end flag is raised...
void rolloverSubrun()
Add a subrun transition immediately after the highest currently define sequence ID.
void sendMetrics()
Send metrics to the MetricManager, if one has been instantiated in the application.
static const std::string FRAGMENTS_RECEIVED_STAT_KEY
Key for Fragments Received MonitoredQuantity.
bool createCollectors(fhicl::ParameterSet const &pset, int defaultReportIntervalFragments, double defaultReportIntervalSeconds, double defaultMonitorWindow, std::string const &primaryStatKeyName)
Create MonitoredQuantity objects for all names registered with the StatisticsHelper.
bool endRun()
Send an EndOfRunFragment to the art thread.
void DoneWritingFragment(detail::RawFragmentHeader frag)
Used to indicate that the given Fragment is now completely in the buffer. Will check for buffer compl...
static const std::string EVENTS_RELEASED_STAT_KEY
Key for the Events Released MonitoredQuantity.
uint32_t GetBroadcastKey()
Gets the shared memory key of the broadcast SharedMemoryManager.
bool endOfData()
Indicate that the end of input has been reached to the art processes.
RawEvent::subrun_id_t subrun_id_t
Copy RawEvent::subrun_id_t into local scope.
void startRun(run_id_t runID)
Start a Run.
size_t GetFragmentCountInBuffer(int buffer, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in a buffer.
void CheckPendingBuffers()
Check for buffers which are ready to be marked incomplete and released to art and issue tokens for an...