2 #define TRACE_NAME (app_name + "_SharedMemoryEventManager").c_str()
4 #include "artdaq/DAQrate/SharedMemoryEventManager.hh"
5 #include "artdaq-core/Core/StatisticsCollection.hh"
6 #include "artdaq-core/Utilities/TraceLock.hh"
9 std::mutex artdaq::SharedMemoryEventManager::sequence_id_mutex_;
12 : SharedMemoryManager(pset.get<uint32_t>(
"shared_memory_key", 0xBEE70000 + getpid()),
13 pset.get<size_t>(
"buffer_count"),
14 pset.has_key(
"max_event_size_bytes") ? pset.get<size_t>(
"max_event_size_bytes") : pset.get<size_t>(
"expected_fragments_per_event") * pset.get<size_t>(
"max_fragment_size_bytes"),
15 pset.get<size_t>(
"stale_buffer_timeout_usec", pset.get<size_t>(
"event_queue_wait_time", 5) * 1000000),
16 !pset.get<bool>(
"broadcast_mode", false))
17 , num_art_processes_(pset.get<size_t>(
"art_analyzer_count", 1))
18 , num_fragments_per_event_(pset.get<size_t>(
"expected_fragments_per_event"))
19 , queue_size_(pset.get<size_t>(
"buffer_count"))
22 , update_run_ids_(pset.get<bool>(
"update_run_ids_on_new_fragment", true))
23 , overwrite_mode_(!pset.get<bool>(
"use_art", true) || pset.get<bool>(
"overwrite_mode", false) || pset.get<bool>(
"broadcast_mode", false))
24 , send_init_fragments_(pset.get<bool>(
"send_init_fragments", true))
25 , buffer_writes_pending_()
26 , incomplete_event_report_interval_ms_(pset.get<int>(
"incomplete_event_report_interval_ms", -1))
27 , last_incomplete_event_report_time_(std::chrono::steady_clock::now())
28 , broadcast_timeout_ms_(pset.get<int>(
"fragment_broadcast_timeout_ms", 3000))
30 , subrun_event_count_(0)
33 , current_art_pset_(art_pset)
35 , broadcasts_(pset.get<uint32_t>(
"broadcast_shared_memory_key", 0xCEE70000 + getpid()),
36 pset.get<size_t>(
"broadcast_buffer_count", 10),
37 pset.get<size_t>(
"broadcast_buffer_size", 0x100000),
38 pset.get<int>(
"fragment_broadcast_timeout_ms", 3000) * 1000, false)
40 SetMinWriteSize(
sizeof(detail::RawEventHeader) +
sizeof(detail::RawFragmentHeader));
41 broadcasts_.SetMinWriteSize(
sizeof(detail::RawEventHeader) +
sizeof(detail::RawFragmentHeader));
43 if (pset.get<
bool>(
"use_art",
true) ==
false) {
44 TLOG(TLVL_INFO) <<
"BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:false" << TLOG_ENDL;
45 num_art_processes_ = 0;
48 TLOG(TLVL_INFO) <<
"BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:true" << TLOG_ENDL;
49 TLOG(TLVL_TRACE) <<
"art_pset is " << art_pset.to_string() << TLOG_ENDL;
51 current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
53 if (overwrite_mode_ && num_art_processes_ > 0)
55 TLOG(TLVL_WARNING) <<
"Art is configured to run, but overwrite mode is enabled! Check your configuration if this in unintentional!" << TLOG_ENDL;
57 else if (overwrite_mode_)
59 TLOG(TLVL_INFO) <<
"Overwrite Mode enabled, no configured art processes at startup" << TLOG_ENDL;
62 for (
size_t ii = 0; ii < size(); ++ii)
64 buffer_writes_pending_[ii] = 0;
67 if (!IsValid())
throw cet::exception(app_name +
"_SharedMemoryEventManager") <<
"Unable to attach to Shared Memory!";
69 TLOG(TLVL_TRACE) <<
"Setting Writer rank to " << my_rank << TLOG_ENDL;
71 TLOG(TLVL_DEBUG) <<
"Writer Rank is " << GetRank() << TLOG_ENDL;
74 TLOG(TLVL_TRACE) <<
"END CONSTRUCTOR" << TLOG_ENDL;
79 TLOG(TLVL_TRACE) <<
"DESTRUCTOR" << TLOG_ENDL;
81 TLOG(TLVL_TRACE) <<
"Destructor END" << TLOG_ENDL;
84 bool artdaq::SharedMemoryEventManager::AddFragment(detail::RawFragmentHeader frag,
void* dataPtr)
86 TLOG(TLVL_TRACE) <<
"AddFragment(Header, ptr) BEGIN frag.word_count=" << std::to_string(frag.word_count)
87 <<
", sequence_id=" << std::to_string(frag.sequence_id) << TLOG_ENDL;
88 auto buffer = getBufferForSequenceID_(frag.sequence_id,
true, frag.timestamp);
89 TLOG(TLVL_TRACE) <<
"Using buffer " << std::to_string(buffer) << TLOG_ENDL;
90 if (buffer == -1)
return false;
93 TLOG(TLVL_ERROR) <<
"Dropping event because data taking has already passed this event number: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
97 auto hdr = getEventHeader_(buffer);
100 hdr->run_id = run_id_;
101 hdr->subrun_id = subrun_id_;
104 TLOG(TLVL_TRACE) <<
"AddFragment before Write calls" << TLOG_ENDL;
105 Write(buffer, dataPtr, frag.word_count *
sizeof(RawDataType));
107 TLOG(TLVL_TRACE) <<
"Checking for complete event" << TLOG_ENDL;
108 auto fragmentCount = GetFragmentCount(frag.sequence_id);
109 hdr->is_complete = fragmentCount == num_fragments_per_event_ && buffer_writes_pending_[buffer] == 0;
110 TLOG(TLVL_TRACE) <<
"hdr->is_complete=" << std::boolalpha << hdr->is_complete
111 <<
", fragmentCount=" << std::to_string(fragmentCount)
112 <<
", num_fragments_per_event=" << std::to_string(num_fragments_per_event_)
113 <<
", buffer_writes_pending_[buffer]=" << std::to_string(buffer_writes_pending_[buffer]) << TLOG_ENDL;
115 complete_buffer_(buffer);
116 requests_.SendRequest(
true);
118 TLOG(TLVL_TRACE) <<
"AddFragment END" << TLOG_ENDL;
122 bool artdaq::SharedMemoryEventManager::AddFragment(FragmentPtr frag,
size_t timeout_usec, FragmentPtr& outfrag)
124 TLOG(TLVL_TRACE) <<
"AddFragment(FragmentPtr) BEGIN" << TLOG_ENDL;
125 auto hdr = *
reinterpret_cast<detail::RawFragmentHeader*
>(frag->headerAddress());
126 auto data = frag->headerAddress();
127 auto start = std::chrono::steady_clock::now();
129 while (!sts && TimeUtils::GetElapsedTimeMicroseconds(start) < timeout_usec)
131 sts = AddFragment(hdr, data);
132 if (!sts) usleep(1000);
136 outfrag = std::move(frag);
138 TLOG(TLVL_TRACE) <<
"AddFragment(FragmentPtr) RETURN " << std::boolalpha << sts << TLOG_ENDL;
144 TLOG(14) <<
"WriteFragmentHeader BEGIN" << TLOG_ENDL;
145 auto buffer = getBufferForSequenceID_(frag.sequence_id,
true, frag.timestamp);
149 if (buffer == -1 && !dropIfNoBuffersAvailable)
return nullptr;
152 TLOG(TLVL_ERROR) <<
"Dropping fragment because data taking has already passed this event number: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
156 TLOG(TLVL_ERROR) <<
"Dropping fragment because there is no room in the queue and reliable mode is off: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
158 dropped_data_.reset(
new Fragment(frag.word_count - frag.num_words()));
159 return dropped_data_->dataBegin();
162 buffer_writes_pending_[buffer]++;
163 TraceLock lk(buffer_mutexes_[buffer], 50,
"WriteFragmentHeader");
164 Write(buffer, &frag, frag.num_words() *
sizeof(RawDataType));
166 auto pos =
reinterpret_cast<RawDataType*
>(GetWritePos(buffer));
167 if (frag.word_count - frag.num_words() > 0) {
168 IncrementWritePos(buffer, (frag.word_count - frag.num_words()) *
sizeof(RawDataType));
171 TLOG(14) <<
"WriteFragmentHeader END" << TLOG_ENDL;
178 TLOG(TLVL_TRACE) <<
"DoneWritingFragment BEGIN" << TLOG_ENDL;
179 auto buffer = getBufferForSequenceID_(frag.sequence_id,
false, frag.timestamp);
180 if (buffer == -1) Detach(
true,
"SharedMemoryEventManager",
"getBufferForSequenceID_ returned -1 when it REALLY shouldn't have! Check program logic!");
181 if (buffer == -2)
return;
182 TraceLock lk(buffer_mutexes_[buffer], 50,
"DoneWritingFragment");
184 auto hdr = getEventHeader_(buffer);
187 hdr->run_id = run_id_;
188 hdr->subrun_id = subrun_id_;
191 buffer_writes_pending_[buffer]--;
192 if (buffer_writes_pending_[buffer] != 0)
194 TLOG(TLVL_TRACE) <<
"Done writing fragment, but there's another writer. Not doing bookkeeping steps." << TLOG_ENDL;
197 auto frag_count = GetFragmentCount(frag.sequence_id);
198 hdr->is_complete = frag_count == num_fragments_per_event_;
199 #if ART_SUPPORTS_DUPLICATE_EVENTS
200 if (!hdr->is_complete && released_incomplete_events_.count(frag.sequence_id)) {
201 hdr->is_complete = frag_count == released_incomplete_events_[frag.sequence_id] && buffer_writes_pending_[buffer] == 0;
205 complete_buffer_(buffer);
206 requests_.SendRequest(
true);
207 TLOG(TLVL_TRACE) <<
"DoneWritingFragment END" << TLOG_ENDL;
212 return GetFragmentCountInBuffer(getBufferForSequenceID_(seqID,
false), type);
217 if (buffer == -1)
return 0;
218 ResetReadPos(buffer);
219 IncrementReadPos(buffer,
sizeof(detail::RawEventHeader));
223 while (MoreDataInBuffer(buffer))
225 auto fragHdr =
reinterpret_cast<artdaq::detail::RawFragmentHeader*
>(GetReadPos(buffer));
226 IncrementReadPos(buffer, fragHdr->word_count *
sizeof(RawDataType));
227 if (type != Fragment::InvalidFragmentType && fragHdr->type != type)
continue;
228 TLOG_TRACE(
"GetFragmentCount") <<
"Adding Fragment with size=" << std::to_string(fragHdr->word_count) <<
" to Fragment count" << TLOG_ENDL;
240 TLOG(TLVL_INFO) <<
"Starting art process with config file " << config_file->getFileName() << TLOG_ENDL;
241 std::vector<char*> args{ (
char*)
"art", (
char*)
"-c", &config_file->getFileName()[0], NULL };
246 execvp(
"art", &args[0]);
251 TLOG(TLVL_INFO) <<
"PID of new art process is " << pid << TLOG_ENDL;
252 art_processes_.insert(pid);
254 waitpid(pid, &status, 0);
255 TLOG(TLVL_INFO) <<
"Removing PID " << pid <<
" from process list" << TLOG_ENDL;
256 art_processes_.erase(pid);
259 TLOG(TLVL_INFO) <<
"art process " << pid <<
" exited normally, " << (restart_art_ ?
"restarting" :
"not restarting") << TLOG_ENDL;
263 TLOG(TLVL_WARNING) <<
"art process " << pid <<
" exited with status code 0x" << std::hex << status <<
" (" << std::dec << status <<
"), " << (restart_art_ ?
"restarting" :
"not restarting") << TLOG_ENDL;
271 if (num_art_processes_ == 0)
return;
272 for (
size_t ii = 0; ii < num_art_processes_; ++ii)
274 StartArtProcess(current_art_pset_);
280 static std::mutex start_art_mutex;
281 TraceLock lk(start_art_mutex, 15,
"StartArtLock");
283 auto initialCount = GetAttachedCount();
284 auto startTime = std::chrono::steady_clock::now();
286 if (pset != current_art_pset_)
288 current_art_pset_ = pset;
289 current_art_config_file_ = std::make_shared<art_config_file>(pset);
292 boost::thread thread([&] {RunArt(current_art_config_file_, pid); });
296 while ((GetAttachedCount() - initialCount < 1 || pid <= 0)
297 && TimeUtils::GetElapsedTime(startTime) < 5)
301 if (GetAttachedCount() - initialCount < 1 || pid <= 0)
303 TLOG(TLVL_WARNING) <<
"art process has not started after 5s. Check art configuration!"
304 <<
" (pid=" << pid <<
", attachedCount=" << std::to_string(GetAttachedCount() - initialCount) <<
")" << TLOG_ENDL;
309 TLOG(TLVL_INFO) << std::setw(4) << std::fixed <<
"art initialization took "
310 << TimeUtils::GetElapsedTime(startTime) <<
" seconds." << TLOG_ENDL;
319 restart_art_ =
false;
320 current_art_config_file_ =
nullptr;
321 current_art_pset_ = fhicl::ParameterSet();
323 for (
auto pid : pids)
325 if (kill(pid, 0) >= 0)
330 if (pids.size() == 0)
332 TLOG(14) <<
"All art processes already exited, nothing to do." << TLOG_ENDL;
337 TLOG(TLVL_TRACE) <<
"Gently informing art processes that it is time to shut down" << TLOG_ENDL;
338 for (
auto pid : pids)
343 int graceful_wait_ms = 1000;
344 int int_wait_ms = 100;
346 TLOG(TLVL_TRACE) <<
"Waiting up to " << graceful_wait_ms <<
" ms for all art processes to exit gracefully" << TLOG_ENDL;
347 for (
int ii = 0; ii < graceful_wait_ms; ++ii)
351 for (
auto pid : pids)
353 if (kill(pid, 0) < 0)
358 if (pids.size() == 0)
360 TLOG(TLVL_TRACE) <<
"All art processes exited after " << ii <<
" ms." << TLOG_ENDL;
365 TLOG(TLVL_TRACE) <<
"Insisting that the art processes shut down" << TLOG_ENDL;
366 for (
auto pid : pids)
371 TLOG(TLVL_TRACE) <<
"Waiting up to " << int_wait_ms <<
" ms for all art processes to exit" << TLOG_ENDL;
372 for (
int ii = graceful_wait_ms; ii < graceful_wait_ms + int_wait_ms; ++ii)
376 for (
auto pid : pids)
378 if (kill(pid, 0) < 0)
384 if (pids.size() == 0)
386 TLOG(TLVL_TRACE) <<
"All art processes exited after " << ii <<
" ms." << TLOG_ENDL;
391 TLOG(TLVL_TRACE) <<
"Killing remaning art processes with extreme prejudice" << TLOG_ENDL;
392 while (pids.size() > 0)
394 kill(*pids.begin(), SIGKILL);
400 TLOG(TLVL_DEBUG) <<
"ReconfigureArt BEGIN" << TLOG_ENDL;
405 for (
size_t ii = 0; ii < broadcasts_.size(); ++ii)
407 broadcasts_.MarkBufferEmpty(ii,
true);
409 if (newRun == 0) newRun = run_id_ + 1;
410 current_art_pset_ = art_pset;
411 current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
413 if (n_art_processes != -1)
415 TLOG(TLVL_INFO) <<
"Setting number of art processes to " << n_art_processes << TLOG_ENDL;
416 num_art_processes_ = n_art_processes;
419 TLOG(TLVL_DEBUG) <<
"ReconfigureArt END" << TLOG_ENDL;
424 init_fragment_.reset(
nullptr);
425 TLOG(TLVL_TRACE) <<
"SharedMemoryEventManager::endOfData" << TLOG_ENDL;
426 restart_art_ =
false;
428 size_t initialStoreSize = GetIncompleteEventCount();
429 TLOG(TLVL_TRACE) <<
"endOfData: Flushing " << initialStoreSize
430 <<
" stale events from the SharedMemoryEventManager." << TLOG_ENDL;
431 int counter = initialStoreSize;
432 while (active_buffers_.size() > 0 && counter > 0)
434 complete_buffer_(*active_buffers_.begin());
437 TLOG(TLVL_TRACE) <<
"endOfData: Done flushing, there are now " << GetIncompleteEventCount()
438 <<
" stale events in the SharedMemoryEventManager." << TLOG_ENDL;
441 TLOG(TLVL_TRACE) <<
"Waiting for " << std::to_string(ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_))) <<
" outstanding buffers..." << TLOG_ENDL;
442 auto start = std::chrono::steady_clock::now();
443 auto lastReadCount = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
446 while (lastReadCount > 0 && TimeUtils::GetElapsedTime(start) < 1)
448 auto temp = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
449 if (temp != lastReadCount)
451 TLOG(TLVL_TRACE) <<
"Waiting for " << std::to_string(temp) <<
" outstanding buffers..." << TLOG_ENDL;
452 lastReadCount = temp;
453 start = std::chrono::steady_clock::now();
455 if (lastReadCount > 0) usleep(1000);
458 TLOG(TLVL_TRACE) <<
"endOfData: Broadcasting EndOfData Fragment" << TLOG_ENDL;
459 FragmentPtr outFrag = std::move(Fragment::eodFrag(GetBufferCount()));
460 bool success = broadcastFragment_(std::move(outFrag), outFrag);
463 TLOG(TLVL_TRACE) <<
"endOfData: Clearing buffers to make room for EndOfData Fragment" << TLOG_ENDL;
464 for (
size_t ii = 0; ii < size(); ++ii)
466 broadcasts_.MarkBufferEmpty(ii,
true);
468 broadcastFragment_(std::move(outFrag), outFrag);
471 TLOG(TLVL_DEBUG) <<
"Waiting for all art processes to exit, there are " << std::to_string(art_processes_.size()) <<
" remaining." << TLOG_ENDL;
472 while (art_processes_.size() > 0)
474 ShutdownArtProcesses(art_processes_);
476 ResetAttachedCount();
478 TLOG(TLVL_TRACE) <<
"endOfData: Clearing buffers" << TLOG_ENDL;
479 for (
size_t ii = 0; ii < size(); ++ii)
481 MarkBufferEmpty(ii,
true);
483 released_incomplete_events_.clear();
485 TLOG(TLVL_TRACE) <<
"endOfData END" << TLOG_ENDL;
486 TLOG(TLVL_INFO) <<
"EndOfData Complete. There were " << GetLastSeenBufferID() <<
" events processed in this run." << TLOG_ENDL;
492 init_fragment_.reset(
nullptr);
496 requests_.SendRoutingToken(queue_size_);
497 TLOG(TLVL_DEBUG) <<
"Starting run " << run_id_
498 <<
", max queue size = "
501 << GetLockedBufferCount() << TLOG_ENDL;
504 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
505 metricMan->sendMetric(
"Run Number", runSubrun,
"Run:Subrun", 1, MetricMode::LastPoint);
514 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
515 metricMan->sendMetric(
"Run Number", runSubrun,
"Run:Subrun", 1, MetricMode::LastPoint);
521 FragmentPtr endOfRunFrag(
new
522 Fragment(static_cast<size_t>
523 (ceil(
sizeof(my_rank) /
524 static_cast<double>(
sizeof(Fragment::value_type))))));
526 endOfRunFrag->setSystemType(Fragment::EndOfRunFragmentType);
527 *endOfRunFrag->dataBegin() = my_rank;
528 broadcastFragment_(std::move(endOfRunFrag), endOfRunFrag);
535 std::unique_ptr<artdaq::Fragment>
537 Fragment(static_cast<size_t>
538 (ceil(
sizeof(my_rank) /
539 static_cast<double>(
sizeof(Fragment::value_type))))));
541 endOfSubrunFrag->setSystemType(Fragment::EndOfSubrunFragmentType);
542 *endOfSubrunFrag->dataBegin() = my_rank;
544 broadcastFragment_(std::move(endOfSubrunFrag), endOfSubrunFrag);
546 TLOG(TLVL_INFO) <<
"Subrun " << subrun_id_ <<
" in run " << run_id_ <<
" has ended. There were " << subrun_event_count_ <<
" events in this subrun." << TLOG_ENDL;
547 subrun_event_count_ = 0;
556 metricMan->sendMetric(
"Incomplete Event Count", GetIncompleteEventCount(),
"events", 1, MetricMode::LastPoint);
557 metricMan->sendMetric(
"Pending Event Count", GetPendingEventCount(),
"events", 1, MetricMode::LastPoint);
560 if (incomplete_event_report_interval_ms_ > 0 && GetLockedBufferCount())
562 if (TimeUtils::GetElapsedTimeMilliseconds(last_incomplete_event_report_time_) < static_cast<size_t>(incomplete_event_report_interval_ms_))
565 last_incomplete_event_report_time_ = std::chrono::steady_clock::now();
566 std::ostringstream oss;
567 oss <<
"Incomplete Events (" << num_fragments_per_event_ <<
"): ";
568 for (
auto& ev : active_buffers_)
570 auto hdr = getEventHeader_(ev);
571 oss << hdr->sequence_id <<
" (" << GetFragmentCount(hdr->sequence_id) <<
"), ";
573 TLOG(TLVL_DEBUG) << oss.str() << TLOG_ENDL;
577 bool artdaq::SharedMemoryEventManager::broadcastFragment_(FragmentPtr frag, FragmentPtr& outFrag)
579 auto buffer = broadcasts_.GetBufferForWriting(
false);
580 auto start_time = std::chrono::steady_clock::now();
581 while (buffer == -1 && TimeUtils::GetElapsedTimeMilliseconds(start_time) < static_cast<size_t>(broadcast_timeout_ms_))
584 buffer = broadcasts_.GetBufferForWriting(
false);
588 TLOG(TLVL_ERROR) <<
"Broadcast of fragment type " << frag->typeString() <<
" failed due to timeout waiting for buffer!" << TLOG_ENDL;
593 auto hdr =
reinterpret_cast<detail::RawEventHeader*
>(broadcasts_.GetBufferStart(buffer));
594 hdr->run_id = run_id_;
595 hdr->subrun_id = subrun_id_;
596 hdr->sequence_id = frag->sequenceID();
597 hdr->is_complete =
true;
598 broadcasts_.IncrementWritePos(buffer,
sizeof(detail::RawEventHeader));
600 TLOG(TLVL_TRACE) <<
"broadcastFragment_ before Write calls" << TLOG_ENDL;
601 broadcasts_.Write(buffer, frag->headerAddress(), frag->size() *
sizeof(RawDataType));
603 broadcasts_.MarkBufferFull(buffer, -1);
608 artdaq::detail::RawEventHeader* artdaq::SharedMemoryEventManager::getEventHeader_(
int buffer)
610 return reinterpret_cast<detail::RawEventHeader*
>(GetBufferStart(buffer));
613 int artdaq::SharedMemoryEventManager::getBufferForSequenceID_(Fragment::sequence_id_t seqID,
bool create_new, Fragment::timestamp_t timestamp)
615 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
616 TLOG(14) <<
"getBufferForSequenceID " << std::to_string(seqID) <<
" BEGIN" << TLOG_ENDL;
617 auto buffers = GetBuffersOwnedByManager();
618 for (
auto& buf : buffers)
620 auto hdr = getEventHeader_(buf);
621 if (hdr->sequence_id == seqID)
623 TLOG(14) <<
"getBufferForSequenceID " << std::to_string(seqID) <<
" returning " << buf << TLOG_ENDL;
628 #if !ART_SUPPORTS_DUPLICATE_EVENTS
629 if (released_incomplete_events_.count(seqID)) {
630 TLOG(TLVL_ERROR) <<
"Buffer has already been marked \"Incomplete\" and sent to art!" << TLOG_ENDL;
635 if (!create_new)
return -1;
637 check_pending_buffers_(lk);
638 int new_buffer = GetBufferForWriting(
false);
640 if (new_buffer == -1)
642 new_buffer = GetBufferForWriting(overwrite_mode_);
645 if (new_buffer == -1)
return -1;
646 TraceLock(buffer_mutexes_[new_buffer], 34,
"getBufferForSequenceID");
647 auto hdr = getEventHeader_(new_buffer);
648 hdr->is_complete =
false;
649 hdr->run_id = run_id_;
650 hdr->subrun_id = subrun_id_;
651 hdr->sequence_id = seqID;
652 buffer_writes_pending_[new_buffer] = 0;
653 IncrementWritePos(new_buffer,
sizeof(detail::RawEventHeader));
655 active_buffers_.insert(new_buffer);
657 if (timestamp != Fragment::InvalidTimestamp)
659 requests_.AddRequest(seqID, timestamp);
661 requests_.SendRequest();
662 TLOG(14) <<
"getBufferForSequenceID " << std::to_string(seqID) <<
" returning newly initialized buffer " << new_buffer << TLOG_ENDL;
666 bool artdaq::SharedMemoryEventManager::hasFragments_(
int buffer)
668 if (buffer == -1)
return true;
669 if (!CheckBuffer(buffer, BufferSemaphoreFlags::Writing))
673 ResetReadPos(buffer);
674 IncrementReadPos(buffer,
sizeof(detail::RawEventHeader));
675 return MoreDataInBuffer(buffer);
678 void artdaq::SharedMemoryEventManager::complete_buffer_(
int buffer)
680 auto hdr = getEventHeader_(buffer);
681 if (hdr->is_complete)
683 TLOG(TLVL_DEBUG) <<
"complete_buffer_: This fragment completes event " << std::to_string(hdr->sequence_id) <<
"." << TLOG_ENDL;
685 requests_.RemoveRequest(hdr->sequence_id);
686 requests_.SendRoutingToken(1);
688 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
689 active_buffers_.erase(buffer);
690 pending_buffers_.insert(buffer);
693 check_pending_buffers_();
696 bool artdaq::SharedMemoryEventManager::bufferComparator(
int bufA,
int bufB)
698 return getEventHeader_(bufA)->sequence_id < getEventHeader_(bufB)->sequence_id;
701 void artdaq::SharedMemoryEventManager::check_pending_buffers_(std::unique_lock<std::mutex>
const& lock)
703 TLOG(TLVL_TRACE) <<
"check_pending_buffers_ BEGIN Locked=" << std::boolalpha << lock.owns_lock() << TLOG_ENDL;
705 auto buffers = GetBuffersOwnedByManager();
706 for (
auto buf : buffers)
708 if (ResetBuffer(buf) && !pending_buffers_.count(buf))
710 auto hdr = getEventHeader_(buf);
711 if (active_buffers_.count(buf))
713 TLOG(TLVL_WARNING) <<
"Active event " << std::to_string(hdr->sequence_id) <<
" is stale. Scheduling release of incomplete event to art." << TLOG_ENDL;
714 requests_.RemoveRequest(hdr->sequence_id);
715 requests_.SendRoutingToken(1);
716 active_buffers_.erase(buf);
717 pending_buffers_.insert(buf);
718 if (!released_incomplete_events_.count(hdr->sequence_id)) {
719 released_incomplete_events_[hdr->sequence_id] = num_fragments_per_event_ - GetFragmentCountInBuffer(buf);
722 released_incomplete_events_[hdr->sequence_id] -= GetFragmentCountInBuffer(buf);
729 Fragment::sequence_id_t lowestSeqId = Fragment::InvalidSequenceID;
732 if (WriteReadyCount(
false) != 0)
734 for (
auto buf : active_buffers_)
736 auto hdr = getEventHeader_(buf);
737 TLOG(TLVL_TRACE) <<
"Buffer: " << buf <<
", SeqID: " << std::to_string(hdr->sequence_id) <<
", ACTIVE" << TLOG_ENDL;
738 if (hdr->sequence_id < lowestSeqId)
740 lowestSeqId = hdr->sequence_id;
743 TLOG(TLVL_TRACE) <<
"Lowest SeqID held: " << std::to_string(lowestSeqId) << TLOG_ENDL;
746 std::list<int> sorted_buffers(pending_buffers_.begin(), pending_buffers_.end());
747 sorted_buffers.sort([
this](
int a,
int b) {
return bufferComparator(a, b); });
748 for (
auto buf : sorted_buffers)
750 auto hdr = getEventHeader_(buf);
751 if (hdr->sequence_id > lowestSeqId)
break;
752 TLOG(TLVL_DEBUG) <<
"Releasing event " << std::to_string(hdr->sequence_id) <<
" in buffer " << buf <<
" to art." << TLOG_ENDL;
754 subrun_event_count_++;
755 pending_buffers_.erase(buf);
758 TLOG(TLVL_TRACE) <<
"check_pending_buffers_: Sending Metrics" << TLOG_ENDL;
761 auto full = ReadReadyCount();
762 auto empty = WriteReadyCount(overwrite_mode_);
764 metricMan->sendMetric(
"Shared Memory Full Buffers", full,
"buffers", 2, MetricMode::LastPoint);
765 metricMan->sendMetric(
"Shared Memory Available Buffers", empty,
"buffers", 2, MetricMode::LastPoint);
766 metricMan->sendMetric(
"Shared Memory Full %", full * 100 / static_cast<double>(total),
"%", 2, MetricMode::LastPoint);
767 metricMan->sendMetric(
"Shared Memory Available %", empty * 100 / static_cast<double>(total),
"%", 2, MetricMode::LastPoint);
769 TLOG(TLVL_TRACE) <<
"check_pending_buffers_ END" << TLOG_ENDL;
772 void artdaq::SharedMemoryEventManager::send_init_frag_()
774 if (init_fragment_ !=
nullptr)
776 TLOG(TLVL_TRACE) <<
"Sending init Fragment to art..." << TLOG_ENDL;
779 std::string fileName =
"receiveInitMessage_" + std::to_string(my_rank) +
".bin";
780 std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
781 ostream.write(reinterpret_cast<char*>(init_fragment_->dataBeginBytes()), init_fragment_->dataSizeBytes());
785 broadcastFragment_(std::move(init_fragment_), init_fragment_);
786 TLOG(TLVL_TRACE) <<
"Init Fragment sent" << TLOG_ENDL;
788 else if (send_init_fragments_)
790 TLOG(TLVL_WARNING) <<
"Cannot send init fragment because I haven't yet received one!" << TLOG_ENDL;
796 if (!init_fragment_ || init_fragment_ ==
nullptr)
798 init_fragment_.swap(frag);
void RunArt(std::shared_ptr< art_config_file > config_file, pid_t &pid_out)
Run an art instance, recording the return codes and restarting it until the end flag is raised...
virtual ~SharedMemoryEventManager()
SharedMemoryEventManager Destructor.
void ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun=0, int n_art_processes=-1)
Restart all art processes, using the given fhicl code to configure the new art processes.
pid_t StartArtProcess(fhicl::ParameterSet pset)
Start one art process.
RawDataType * WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable=false)
Get a pointer to a reserved memory area for the given Fragment header.
RawEvent::run_id_t run_id_t
Copy RawEvent::run_id_t into local scope.
size_t GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in an event.
void StartArt()
Start all the art processes.
void ShutdownArtProcesses(std::set< pid_t > pids)
Shutdown a set of art processes.
void SetInitFragment(FragmentPtr frag)
Set the stored Init fragment, if one has not yet been set already.
void sendMetrics()
Send metrics to the MetricManager, if one has been instantiated in the application.
void startSubrun()
Start a new Subrun, incrementing the subrun number.
SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset)
SharedMemoryEventManager Constructor.
bool endSubrun()
Send an EndOfSubRunFragment to the art thread.
bool endRun()
Send an EndOfRunFragment to the art thread.
void DoneWritingFragment(detail::RawFragmentHeader frag)
Used to indicate that the given Fragment is now completely in the buffer. Will check for buffer compl...
bool endOfData()
Indicate that the end of input has been reached to the art processes.
void startRun(run_id_t runID)
Start a Run.
size_t GetFragmentCountInBuffer(int buffer, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in a buffer.