1 #include "artdaq/DAQrate/SharedMemoryEventManager.hh"
2 #include "artdaq-core/Core/StatisticsCollection.hh"
3 #include "artdaq-core/Utilities/TraceLock.hh"
5 #include "SharedMemoryEventManager.hh"
8 : SharedMemoryManager(pset.get<uint32_t>(
"shared_memory_key", 0xBEE70000 + getpid()),
9 pset.get<size_t>(
"buffer_count"),
10 pset.has_key(
"max_event_size_bytes") ? pset.get<size_t>(
"max_event_size_bytes") : pset.get<size_t>(
"expected_fragments_per_event") * pset.get<size_t>(
"max_fragment_size_bytes"),
11 pset.get<size_t>(
"stale_buffer_timeout_usec", pset.get<size_t>(
"event_queue_wait_time", 5) * 1000000),
12 !pset.get<bool>(
"broadcast_mode", false))
13 , num_art_processes_(pset.get<size_t>(
"art_analyzer_count", 1))
14 , num_fragments_per_event_(pset.get<size_t>(
"expected_fragments_per_event"))
15 , queue_size_(pset.get<size_t>(
"buffer_count"))
18 , update_run_ids_(pset.get<bool>(
"update_run_ids_on_new_fragment", true))
19 , overwrite_mode_(!pset.get<bool>(
"use_art", true) || pset.get<bool>(
"overwrite_mode", false) || pset.get<bool>(
"broadcast_mode", false))
20 , send_init_fragments_(pset.get<bool>(
"send_init_fragments", true))
21 , buffer_writes_pending_()
22 , incomplete_event_report_interval_ms_(pset.get<int>(
"incomplete_event_report_interval_ms", -1))
23 , last_incomplete_event_report_time_(std::chrono::steady_clock::now())
24 , broadcast_timeout_ms_(pset.get<int>(
"fragment_broadcast_timeout_ms", 3000))
26 , subrun_event_count_(0)
29 , current_art_pset_(art_pset)
31 , broadcasts_(pset.get<uint32_t>(
"broadcast_shared_memory_key", 0xCEE70000 + getpid()),
32 pset.get<size_t>(
"broadcast_buffer_count", 10),
33 pset.get<size_t>(
"broadcast_buffer_size", 0x100000),
34 pset.get<int>(
"fragment_broadcast_timeout_ms", 3000) * 1000, false)
36 SetMinWriteSize(
sizeof(detail::RawEventHeader) +
sizeof(detail::RawFragmentHeader));
37 broadcasts_.SetMinWriteSize(
sizeof(detail::RawEventHeader) +
sizeof(detail::RawFragmentHeader));
39 if (pset.get<
bool>(
"use_art",
true) ==
false) {
40 TLOG_INFO(
"SharedMemoryEventManager") <<
"BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:false" << TLOG_ENDL;
41 num_art_processes_ = 0;
44 TLOG_INFO(
"SharedMemoryEventManager") <<
"BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:true" << TLOG_ENDL;
45 TLOG_TRACE(
"SharedMemoryEventManager") <<
"art_pset is " << art_pset.to_string() << TLOG_ENDL;
47 current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
49 if (overwrite_mode_ && num_art_processes_ > 0)
51 TLOG_WARNING(
"SharedMemoryEventManager") <<
"Art is configured to run, but overwrite mode is enabled! Check your configuration if this in unintentional!" << TLOG_ENDL;
53 else if (overwrite_mode_)
55 TLOG_INFO(
"SharedMemoryEventManager") <<
"Overwrite Mode enabled, no configured art processes at startup" << TLOG_ENDL;
58 for (
size_t ii = 0; ii < size(); ++ii)
60 buffer_writes_pending_[ii] = 0;
63 if (!IsValid())
throw cet::exception(
"SharedMemoryEventManager") <<
"Unable to attach to Shared Memory!";
65 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Setting Writer rank to " << my_rank << TLOG_ENDL;
67 TLOG_DEBUG(
"SharedMemoryEventManager") <<
"Writer Rank is " << GetRank() << TLOG_ENDL;
70 TLOG_TRACE(
"SharedMemoryEventManager") <<
"END CONSTRUCTOR" << TLOG_ENDL;
75 TLOG_TRACE(
"SharedMemoryEventManager") <<
"DESTRUCTOR" << TLOG_ENDL;
77 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Destructor END" << TLOG_ENDL;
80 bool artdaq::SharedMemoryEventManager::AddFragment(detail::RawFragmentHeader frag,
void* dataPtr)
82 TLOG_TRACE(
"SharedMemoryEventManager") <<
"AddFragment(Header, ptr) BEGIN frag.word_count=" << std::to_string(frag.word_count)
83 <<
", sequence_id=" << std::to_string(frag.sequence_id) << TLOG_ENDL;
84 auto buffer = getBufferForSequenceID_(frag.sequence_id,
true, frag.timestamp);
85 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Using buffer " << std::to_string(buffer) << TLOG_ENDL;
86 if (buffer == -1)
return false;
89 TLOG_ERROR(
"SharedMemoryEventManager") <<
"Dropping event because data taking has already passed this event number: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
93 auto hdr = getEventHeader_(buffer);
96 hdr->run_id = run_id_;
97 hdr->subrun_id = subrun_id_;
100 TLOG_TRACE(
"SharedMemoryEventManager") <<
"AddFragment before Write calls" << TLOG_ENDL;
101 Write(buffer, dataPtr, frag.word_count *
sizeof(RawDataType));
103 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Checking for complete event" << TLOG_ENDL;
104 auto fragmentCount = GetFragmentCount(frag.sequence_id);
105 hdr->is_complete = fragmentCount == num_fragments_per_event_ && buffer_writes_pending_[buffer] == 0;
106 TLOG_TRACE(
"SharedMemoryEventManager") <<
"hdr->is_complete=" << std::boolalpha << hdr->is_complete
107 <<
", fragmentCount=" << std::to_string(fragmentCount)
108 <<
", num_fragments_per_event=" << std::to_string(num_fragments_per_event_)
109 <<
", buffer_writes_pending_[buffer]=" << std::to_string(buffer_writes_pending_[buffer]) << TLOG_ENDL;
111 complete_buffer_(buffer);
112 requests_.SendRequest(
true);
114 TLOG_TRACE(
"SharedMemoryEventManager") <<
"AddFragment END" << TLOG_ENDL;
118 bool artdaq::SharedMemoryEventManager::AddFragment(FragmentPtr frag,
size_t timeout_usec, FragmentPtr& outfrag)
120 TLOG_TRACE(
"SharedMemoryEventManager") <<
"AddFragment(FragmentPtr) BEGIN" << TLOG_ENDL;
121 auto hdr = *
reinterpret_cast<detail::RawFragmentHeader*
>(frag->headerAddress());
122 auto data = frag->headerAddress();
123 auto start = std::chrono::steady_clock::now();
125 while (!sts && TimeUtils::GetElapsedTimeMicroseconds(start) < timeout_usec)
127 sts = AddFragment(hdr, data);
128 if (!sts) usleep(1000);
132 outfrag = std::move(frag);
134 TLOG_TRACE(
"SharedMemoryEventManager") <<
"AddFragment(FragmentPtr) RETURN " << std::boolalpha << sts << TLOG_ENDL;
140 TLOG_ARB(14,
"SharedMemoryEventManager") <<
"WriteFragmentHeader BEGIN" << TLOG_ENDL;
141 auto buffer = getBufferForSequenceID_(frag.sequence_id,
true, frag.timestamp);
145 if (buffer == -1 && !dropIfNoBuffersAvailable)
return nullptr;
148 TLOG_ERROR(
"SharedMemoryEventManager") <<
"Dropping fragment because data taking has already passed this event number: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
152 TLOG_ERROR(
"SharedMemoryEventManager") <<
"Dropping fragment because there is no room in the queue and reliable mode is off: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
153 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
155 dropped_data_.reset(
new Fragment(frag.word_count - frag.num_words()));
156 return dropped_data_->dataBegin();
159 buffer_writes_pending_[buffer]++;
160 TraceLock lk(buffer_mutexes_[buffer], 50,
"WriteFragmentHeader");
161 Write(buffer, &frag, frag.num_words() *
sizeof(RawDataType));
163 auto pos =
reinterpret_cast<RawDataType*
>(GetWritePos(buffer));
164 if (frag.word_count - frag.num_words() > 0) {
165 IncrementWritePos(buffer, (frag.word_count - frag.num_words()) *
sizeof(RawDataType));
168 TLOG_ARB(14,
"SharedMemoryEventManager") <<
"WriteFragmentHeader END" << TLOG_ENDL;
175 TLOG_TRACE(
"SharedMemoryEventManager") <<
"DoneWritingFragment BEGIN" << TLOG_ENDL;
176 auto buffer = getBufferForSequenceID_(frag.sequence_id,
false, frag.timestamp);
177 if (buffer == -1) Detach(
true,
"SharedMemoryEventManager",
"getBufferForSequenceID_ returned -1 when it REALLY shouldn't have! Check program logic!");
178 if (buffer == -2)
return;
180 auto hdr = getEventHeader_(buffer);
183 hdr->run_id = run_id_;
184 hdr->subrun_id = subrun_id_;
187 buffer_writes_pending_[buffer]--;
188 if (buffer_writes_pending_[buffer] != 0)
190 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Done writing fragment, but there's another writer. Not doing bookkeeping steps." << TLOG_ENDL;
193 auto frag_count = GetFragmentCount(frag.sequence_id);
194 hdr->is_complete = frag_count == num_fragments_per_event_;
195 #if ART_SUPPORTS_DUPLICATE_EVENTS
196 if (!hdr->is_complete && released_incomplete_events_.count(frag.sequence_id)) {
197 hdr->is_complete = frag_count == released_incomplete_events_[frag.sequence_id] && buffer_writes_pending_[buffer] == 0;
201 complete_buffer_(buffer);
202 requests_.SendRequest(
true);
203 TLOG_TRACE(
"SharedMemoryEventManager") <<
"DoneWritingFragment END" << TLOG_ENDL;
208 return GetFragmentCountInBuffer(getBufferForSequenceID_(seqID,
false), type);
213 if (buffer == -1)
return 0;
214 ResetReadPos(buffer);
215 IncrementReadPos(buffer,
sizeof(detail::RawEventHeader));
219 while (MoreDataInBuffer(buffer))
221 auto fragHdr =
reinterpret_cast<artdaq::detail::RawFragmentHeader*
>(GetReadPos(buffer));
222 IncrementReadPos(buffer, fragHdr->word_count *
sizeof(RawDataType));
223 if (type != Fragment::InvalidFragmentType && fragHdr->type != type)
continue;
224 TLOG_TRACE(
"GetFragmentCount") <<
"Adding Fragment with size=" << std::to_string(fragHdr->word_count) <<
" to Fragment count" << TLOG_ENDL;
236 TLOG_INFO(
"SharedMemoryEventManager") <<
"Starting art process with config file " << config_file->getFileName() << TLOG_ENDL;
237 std::vector<char*> args{ (
char*)
"art", (
char*)
"-c", &config_file->getFileName()[0], NULL };
242 execvp(
"art", &args[0]);
247 TLOG_INFO(
"SharedMemoryEventManager") <<
"PID of new art process is " << pid << TLOG_ENDL;
248 art_processes_.insert(pid);
250 waitpid(pid, &status, 0);
251 TLOG_INFO(
"SharedMemoryEventManager") <<
"Removing PID " << pid <<
" from process list" << TLOG_ENDL;
252 art_processes_.erase(pid);
255 TLOG_INFO(
"SharedMemoryEventManager") <<
"art process " << pid <<
" exited normally, " << (restart_art_ ?
"restarting" :
"not restarting") << TLOG_ENDL;
259 TLOG_WARNING(
"SharedMemoryEventManager") <<
"art process " << pid <<
" exited with status code 0x" << std::hex << status <<
" (" << std::dec << status <<
"), " << (restart_art_ ?
"restarting" :
"not restarting") << TLOG_ENDL;
267 if (num_art_processes_ == 0)
return;
268 for (
size_t ii = 0; ii < num_art_processes_; ++ii)
270 StartArtProcess(current_art_pset_);
276 static std::mutex start_art_mutex;
277 TraceLock lk(start_art_mutex, 15,
"StartArtLock");
279 auto initialCount = GetAttachedCount();
280 auto startTime = std::chrono::steady_clock::now();
282 if (pset != current_art_pset_)
284 current_art_pset_ = pset;
285 current_art_config_file_ = std::make_shared<art_config_file>(pset);
288 boost::thread thread([&] {RunArt(current_art_config_file_, pid); });
292 while ((GetAttachedCount() - initialCount < 1 || pid <= 0)
293 && TimeUtils::GetElapsedTime(startTime) < 5)
297 if (GetAttachedCount() - initialCount < 1 || pid <= 0)
299 TLOG_WARNING(
"SharedMemoryEventManager") <<
"art process has not started after 5s. Check art configuration!"
300 <<
" (pid=" << pid <<
", attachedCount=" << std::to_string(GetAttachedCount() - initialCount) <<
")" << TLOG_ENDL;
305 TLOG_INFO(
"SharedMemoryEventManager") << std::setw(4) << std::fixed <<
"art initialization took "
306 << TimeUtils::GetElapsedTime(startTime) <<
" seconds." << TLOG_ENDL;
315 restart_art_ =
false;
316 current_art_config_file_ =
nullptr;
317 current_art_pset_ = fhicl::ParameterSet();
319 for (
auto pid : pids)
321 if (kill(pid, 0) >= 0)
326 if (pids.size() == 0)
328 TLOG_ARB(14,
"SharedMemoryEventManager") <<
"All art processes already exited, nothing to do." << TLOG_ENDL;
333 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Gently informing art processes that it is time to shut down" << TLOG_ENDL;
334 for (
auto pid : pids)
339 int graceful_wait_ms = 1000;
340 int int_wait_ms = 100;
342 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Waiting up to " << graceful_wait_ms <<
" ms for all art processes to exit gracefully" << TLOG_ENDL;
343 for (
int ii = 0; ii < graceful_wait_ms; ++ii)
347 for (
auto pid : pids)
349 if (kill(pid, 0) < 0)
354 if (pids.size() == 0)
356 TLOG_TRACE(
"SharedMemoryEventManager") <<
"All art processes exited after " << ii <<
" ms." << TLOG_ENDL;
361 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Insisting that the art processes shut down" << TLOG_ENDL;
362 for (
auto pid : pids)
367 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Waiting up to " << int_wait_ms <<
" ms for all art processes to exit" << TLOG_ENDL;
368 for (
int ii = graceful_wait_ms; ii < graceful_wait_ms + int_wait_ms; ++ii)
372 for (
auto pid : pids)
374 if (kill(pid, 0) < 0)
380 if (pids.size() == 0)
382 TLOG_TRACE(
"SharedMemoryEventManager") <<
"All art processes exited after " << ii <<
" ms." << TLOG_ENDL;
387 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Killing remaning art processes with extreme prejudice" << TLOG_ENDL;
388 while (pids.size() > 0)
390 kill(*pids.begin(), SIGKILL);
396 TLOG_DEBUG(
"SharedMemoryEventManager") <<
"ReconfigureArt BEGIN" << TLOG_ENDL;
401 for (
size_t ii = 0; ii < broadcasts_.size(); ++ii)
403 broadcasts_.MarkBufferEmpty(ii,
true);
405 if (newRun == 0) newRun = run_id_ + 1;
406 current_art_pset_ = art_pset;
407 current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
409 if (n_art_processes != -1)
411 TLOG_INFO(
"SharedMemoryEventManager") <<
"Setting number of art processes to " << n_art_processes << TLOG_ENDL;
412 num_art_processes_ = n_art_processes;
415 TLOG_DEBUG(
"SharedMemoryEventManager") <<
"ReconfigureArt END" << TLOG_ENDL;
420 init_fragment_.reset(
nullptr);
421 TLOG_TRACE(
"SharedMemoryEventManager") <<
"SharedMemoryEventManager::endOfData" << TLOG_ENDL;
422 restart_art_ =
false;
424 size_t initialStoreSize = GetIncompleteEventCount();
425 TLOG_TRACE(
"SharedMemoryEventManager") <<
"endOfData: Flushing " << initialStoreSize
426 <<
" stale events from the SharedMemoryEventManager." << TLOG_ENDL;
427 int counter = initialStoreSize;
428 while (active_buffers_.size() > 0 && counter > 0)
430 complete_buffer_(*active_buffers_.begin());
433 TLOG_TRACE(
"SharedMemoryEventManager") <<
"endOfData: Done flushing, there are now " << GetIncompleteEventCount()
434 <<
" stale events in the SharedMemoryEventManager." << TLOG_ENDL;
437 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Waiting for " << std::to_string(ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_))) <<
" outstanding buffers..." << TLOG_ENDL;
438 auto start = std::chrono::steady_clock::now();
439 auto lastReadCount = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
442 while (lastReadCount > 0 && TimeUtils::GetElapsedTime(start) < 1)
444 auto temp = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
445 if (temp != lastReadCount)
447 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Waiting for " << std::to_string(temp) <<
" outstanding buffers..." << TLOG_ENDL;
448 lastReadCount = temp;
449 start = std::chrono::steady_clock::now();
451 if (lastReadCount > 0) usleep(1000);
454 TLOG_TRACE(
"SharedMemoryEventManager") <<
"endOfData: Broadcasting EndOfData Fragment" << TLOG_ENDL;
455 FragmentPtr outFrag = std::move(Fragment::eodFrag(GetBufferCount()));
456 bool success = broadcastFragment_(std::move(outFrag), outFrag);
459 TLOG_TRACE(
"SharedMemoryEventManager") <<
"endOfData: Clearing buffers to make room for EndOfData Fragment" << TLOG_ENDL;
460 for (
size_t ii = 0; ii < size(); ++ii)
462 broadcasts_.MarkBufferEmpty(ii,
true);
464 broadcastFragment_(std::move(outFrag), outFrag);
467 while (art_processes_.size() > 0)
469 TLOG_DEBUG(
"SharedMemoryEventManager") <<
"Waiting for all art processes to exit, there are " << std::to_string(art_processes_.size()) <<
" remaining." << TLOG_ENDL;
470 ShutdownArtProcesses(art_processes_);
472 ResetAttachedCount();
474 TLOG_TRACE(
"SharedMemoryEventManager") <<
"endOfData: Clearing buffers" << TLOG_ENDL;
475 for (
size_t ii = 0; ii < size(); ++ii)
477 MarkBufferEmpty(ii,
true);
479 released_incomplete_events_.clear();
481 TLOG_TRACE(
"SharedMemoryEventManager") <<
"endOfData END" << TLOG_ENDL;
482 TLOG_INFO(
"SharedMemoryEventManager") <<
"EndOfData Complete. There were " << GetLastSeenBufferID() <<
" events processed in this run." << TLOG_ENDL;
488 init_fragment_.reset(
nullptr);
492 requests_.SendRoutingToken(queue_size_);
493 TLOG_DEBUG(
"SharedMemoryEventManager") <<
"Starting run " << run_id_
494 <<
", max queue size = "
497 << GetLockedBufferCount() << TLOG_ENDL;
500 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
501 metricMan->sendMetric(
"Run Number", runSubrun,
"Run:Subrun", 1, MetricMode::LastPoint);
510 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
511 metricMan->sendMetric(
"Run Number", runSubrun,
"Run:Subrun", 1, MetricMode::LastPoint);
517 FragmentPtr endOfRunFrag(
new
518 Fragment(static_cast<size_t>
519 (ceil(
sizeof(my_rank) /
520 static_cast<double>(
sizeof(Fragment::value_type))))));
522 endOfRunFrag->setSystemType(Fragment::EndOfRunFragmentType);
523 *endOfRunFrag->dataBegin() = my_rank;
524 broadcastFragment_(std::move(endOfRunFrag), endOfRunFrag);
531 std::unique_ptr<artdaq::Fragment>
533 Fragment(static_cast<size_t>
534 (ceil(
sizeof(my_rank) /
535 static_cast<double>(
sizeof(Fragment::value_type))))));
537 endOfSubrunFrag->setSystemType(Fragment::EndOfSubrunFragmentType);
538 *endOfSubrunFrag->dataBegin() = my_rank;
540 broadcastFragment_(std::move(endOfSubrunFrag), endOfSubrunFrag);
542 TLOG_INFO(
"SharedMemoryEventManager") <<
"Subrun " << subrun_id_ <<
" in run " << run_id_ <<
" has ended. There were " << subrun_event_count_ <<
" events in this subrun." << TLOG_ENDL;
543 subrun_event_count_ = 0;
552 metricMan->sendMetric(
"Incomplete Event Count", GetIncompleteEventCount(),
"events", 1, MetricMode::LastPoint);
553 metricMan->sendMetric(
"Pending Event Count", GetPendingEventCount(),
"events", 1, MetricMode::LastPoint);
555 check_pending_buffers_();
556 if (incomplete_event_report_interval_ms_ > 0 && GetLockedBufferCount())
558 if (TimeUtils::GetElapsedTimeMilliseconds(last_incomplete_event_report_time_) < static_cast<size_t>(incomplete_event_report_interval_ms_))
561 last_incomplete_event_report_time_ = std::chrono::steady_clock::now();
562 std::ostringstream oss;
563 oss <<
"Incomplete Events (" << num_fragments_per_event_ <<
"): ";
564 for (
auto& ev : active_buffers_)
566 auto hdr = getEventHeader_(ev);
567 oss << hdr->sequence_id <<
" (" << GetFragmentCount(hdr->sequence_id) <<
"), ";
569 TLOG_DEBUG(
"SharedMemoryEventManager") << oss.str() << TLOG_ENDL;
573 bool artdaq::SharedMemoryEventManager::broadcastFragment_(FragmentPtr frag, FragmentPtr& outFrag)
575 auto buffer = broadcasts_.GetBufferForWriting(
false);
576 auto start_time = std::chrono::steady_clock::now();
577 while (buffer == -1 && TimeUtils::GetElapsedTimeMilliseconds(start_time) < static_cast<size_t>(broadcast_timeout_ms_))
580 buffer = broadcasts_.GetBufferForWriting(
false);
584 TLOG_ERROR(
"SharedMemoryEventManager") <<
"Broadcast of fragment type " << frag->typeString() <<
" failed due to timeout waiting for buffer!" << TLOG_ENDL;
589 auto hdr =
reinterpret_cast<detail::RawEventHeader*
>(broadcasts_.GetBufferStart(buffer));
590 hdr->run_id = run_id_;
591 hdr->subrun_id = subrun_id_;
592 hdr->sequence_id = frag->sequenceID();
593 hdr->is_complete =
true;
594 broadcasts_.IncrementWritePos(buffer,
sizeof(detail::RawEventHeader));
596 TLOG_TRACE(
"SharedMemoryEventManager") <<
"broadcastFragment_ before Write calls" << TLOG_ENDL;
597 broadcasts_.Write(buffer, frag->headerAddress(), frag->size() *
sizeof(RawDataType));
599 broadcasts_.MarkBufferFull(buffer, -1);
604 artdaq::detail::RawEventHeader* artdaq::SharedMemoryEventManager::getEventHeader_(
int buffer)
606 return reinterpret_cast<detail::RawEventHeader*
>(GetBufferStart(buffer));
609 int artdaq::SharedMemoryEventManager::getBufferForSequenceID_(Fragment::sequence_id_t seqID,
bool create_new, Fragment::timestamp_t timestamp)
611 check_pending_buffers_();
612 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
613 TLOG_ARB(14,
"SharedMemoryEventManager") <<
"getBufferForSequenceID " << std::to_string(seqID) <<
" BEGIN" << TLOG_ENDL;
614 auto buffers = GetBuffersOwnedByManager();
615 for (
auto& buf : buffers)
617 auto hdr = getEventHeader_(buf);
618 if (hdr->sequence_id == seqID)
620 TLOG_ARB(14,
"SharedMemoryEventManager") <<
"getBufferForSequenceID " << std::to_string(seqID) <<
" returning " << buf << TLOG_ENDL;
625 #if !ART_SUPPORTS_DUPLICATE_EVENTS
626 if (released_incomplete_events_.count(seqID)) {
627 TLOG_ERROR(
"SharedMemoryEventManager") <<
"Buffer has already been marked \"Incomplete\" and sent to art!" << TLOG_ENDL;
632 if (!create_new)
return -1;
634 int new_buffer = GetBufferForWriting(
false);
636 if (new_buffer == -1)
638 new_buffer = GetBufferForWriting(overwrite_mode_);
641 if (new_buffer == -1)
return -1;
642 TraceLock(buffer_mutexes_[new_buffer], 34,
"getBufferForSequenceID");
643 auto hdr = getEventHeader_(new_buffer);
644 hdr->is_complete =
false;
645 hdr->run_id = run_id_;
646 hdr->subrun_id = subrun_id_;
647 hdr->sequence_id = seqID;
648 buffer_writes_pending_[new_buffer] = 0;
649 IncrementWritePos(new_buffer,
sizeof(detail::RawEventHeader));
651 active_buffers_.insert(new_buffer);
653 if (timestamp != Fragment::InvalidTimestamp)
655 requests_.AddRequest(seqID, timestamp);
657 requests_.SendRequest();
658 TLOG_ARB(14,
"SharedMemoryEventManager") <<
"getBufferForSequenceID " << std::to_string(seqID) <<
" returning newly initialized buffer " << new_buffer << TLOG_ENDL;
662 bool artdaq::SharedMemoryEventManager::hasFragments_(
int buffer)
664 if (buffer == -1)
return true;
665 if (!CheckBuffer(buffer, BufferSemaphoreFlags::Writing))
669 ResetReadPos(buffer);
670 IncrementReadPos(buffer,
sizeof(detail::RawEventHeader));
671 return MoreDataInBuffer(buffer);
674 void artdaq::SharedMemoryEventManager::complete_buffer_(
int buffer)
676 auto hdr = getEventHeader_(buffer);
677 if (hdr->is_complete)
679 TLOG_DEBUG(
"SharedMemoryEventManager") <<
"complete_buffer_: This fragment completes event " << std::to_string(hdr->sequence_id) <<
"." << TLOG_ENDL;
681 requests_.RemoveRequest(hdr->sequence_id);
682 requests_.SendRoutingToken(1);
684 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
685 active_buffers_.erase(buffer);
686 pending_buffers_.insert(buffer);
689 check_pending_buffers_();
692 bool artdaq::SharedMemoryEventManager::bufferComparator(
int bufA,
int bufB)
694 return getEventHeader_(bufA)->sequence_id < getEventHeader_(bufB)->sequence_id;
697 void artdaq::SharedMemoryEventManager::check_pending_buffers_()
699 TLOG_TRACE(
"SharedMemoryEventManager") <<
"check_pending_buffers_ BEGIN" << TLOG_ENDL;
701 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
702 auto buffers = GetBuffersOwnedByManager();
703 for (
auto buf : buffers)
705 if (ResetBuffer(buf) && !pending_buffers_.count(buf))
707 auto hdr = getEventHeader_(buf);
708 if (active_buffers_.count(buf))
710 TLOG_WARNING(
"SharedMemoryEventManager") <<
"Active event " << std::to_string(hdr->sequence_id) <<
" is stale. Scheduling release of incomplete event to art." << TLOG_ENDL;
711 requests_.RemoveRequest(hdr->sequence_id);
712 requests_.SendRoutingToken(1);
713 active_buffers_.erase(buf);
714 pending_buffers_.insert(buf);
715 if (!released_incomplete_events_.count(hdr->sequence_id)) {
716 released_incomplete_events_[hdr->sequence_id] = num_fragments_per_event_ - GetFragmentCountInBuffer(buf);
719 released_incomplete_events_[hdr->sequence_id] -= GetFragmentCountInBuffer(buf);
726 Fragment::sequence_id_t lowestSeqId = Fragment::InvalidSequenceID;
729 if (WriteReadyCount(
false) != 0)
731 for (
auto buf : active_buffers_)
733 auto hdr = getEventHeader_(buf);
734 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Buffer: " << buf <<
", SeqID: " << std::to_string(hdr->sequence_id) <<
", ACTIVE" << TLOG_ENDL;
735 if (hdr->sequence_id < lowestSeqId)
737 lowestSeqId = hdr->sequence_id;
740 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Lowest SeqID held: " << std::to_string(lowestSeqId) << TLOG_ENDL;
743 std::list<int> sorted_buffers(pending_buffers_.begin(), pending_buffers_.end());
744 sorted_buffers.sort([
this](
int a,
int b) {
return bufferComparator(a, b); });
745 for (
auto buf : sorted_buffers)
747 auto hdr = getEventHeader_(buf);
748 if (hdr->sequence_id > lowestSeqId)
break;
749 TLOG_DEBUG(
"SharedMemoryEventManager") <<
"Releasing event " << std::to_string(hdr->sequence_id) <<
" in buffer " << buf <<
" to art." << TLOG_ENDL;
751 subrun_event_count_++;
752 pending_buffers_.erase(buf);
756 TLOG_TRACE(
"SharedMemoryEventManager") <<
"check_pending_buffers_: Sending Metrics" << TLOG_ENDL;
759 auto full = ReadReadyCount();
760 auto empty = WriteReadyCount(overwrite_mode_);
762 metricMan->sendMetric(
"Shared Memory Full Buffers", full,
"buffers", 2, MetricMode::LastPoint);
763 metricMan->sendMetric(
"Shared Memory Available Buffers", empty,
"buffers", 2, MetricMode::LastPoint);
764 metricMan->sendMetric(
"Shared Memory Full %", full * 100 / static_cast<double>(total),
"%", 2, MetricMode::LastPoint);
765 metricMan->sendMetric(
"Shared Memory Available %", empty * 100 / static_cast<double>(total),
"%", 2, MetricMode::LastPoint);
767 TLOG_TRACE(
"SharedMemoryEventManager") <<
"check_pending_buffers_ END" << TLOG_ENDL;
770 void artdaq::SharedMemoryEventManager::send_init_frag_()
772 if (init_fragment_ !=
nullptr)
774 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Sending init Fragment to art..." << TLOG_ENDL;
777 std::string fileName =
"receiveInitMessage_" + std::to_string(my_rank) +
".bin";
778 std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
779 ostream.write(reinterpret_cast<char*>(init_fragment_->dataBeginBytes()), init_fragment_->dataSizeBytes());
783 broadcastFragment_(std::move(init_fragment_), init_fragment_);
784 TLOG_TRACE(
"SharedMemoryEventManager") <<
"Init Fragment sent" << TLOG_ENDL;
786 else if (send_init_fragments_)
788 TLOG_WARNING(
"SharedMemoryEventManager") <<
"Cannot send init fragment because I haven't yet received one!" << TLOG_ENDL;
794 if (!init_fragment_ || init_fragment_ ==
nullptr)
796 init_fragment_.swap(frag);
void RunArt(std::shared_ptr< art_config_file > config_file, pid_t &pid_out)
Run an art instance, recording the return codes and restarting it until the end flag is raised...
virtual ~SharedMemoryEventManager()
SharedMemoryEventManager Destructor.
void ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun=0, int n_art_processes=-1)
Restart all art processes, using the given fhicl code to configure the new art processes.
pid_t StartArtProcess(fhicl::ParameterSet pset)
Start one art process.
RawDataType * WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable=false)
Get a pointer to a reserved memory area for the given Fragment header.
RawEvent::run_id_t run_id_t
Copy RawEvent::run_id_t into local scope.
size_t GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in an event.
void StartArt()
Start all the art processes.
void ShutdownArtProcesses(std::set< pid_t > pids)
Shutdown a set of art processes.
void SetInitFragment(FragmentPtr frag)
Set the stored Init fragment, if one has not yet been set already.
void sendMetrics()
Send metrics to the MetricManager, if one has been instantiated in the application.
void startSubrun()
Start a new Subrun, incrementing the subrun number.
SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset)
SharedMemoryEventManager Constructor.
bool endSubrun()
Send an EndOfSubRunFragment to the art thread.
bool endRun()
Send an EndOfRunFragment to the art thread.
void DoneWritingFragment(detail::RawFragmentHeader frag)
Used to indicate that the given Fragment is now completely in the buffer. Will check for buffer compl...
bool endOfData()
Indicate that the end of input has been reached to the art processes.
void startRun(run_id_t runID)
Start a Run.
size_t GetFragmentCountInBuffer(int buffer, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in a buffer.