2 #define TRACE_NAME (app_name + "_SharedMemoryEventManager").c_str()
4 #include "artdaq/DAQrate/SharedMemoryEventManager.hh"
5 #include "artdaq-core/Core/StatisticsCollection.hh"
6 #include "artdaq-core/Utilities/TraceLock.hh"
8 #include "SharedMemoryEventManager.hh"
10 std::mutex artdaq::SharedMemoryEventManager::sequence_id_mutex_;
13 : SharedMemoryManager(pset.get<uint32_t>(
"shared_memory_key", 0xBEE70000 + getpid()),
14 pset.get<size_t>(
"buffer_count"),
15 pset.has_key(
"max_event_size_bytes") ? pset.get<size_t>(
"max_event_size_bytes") : pset.get<size_t>(
"expected_fragments_per_event") * pset.get<size_t>(
"max_fragment_size_bytes"),
16 pset.get<size_t>(
"stale_buffer_timeout_usec", pset.get<size_t>(
"event_queue_wait_time", 5) * 1000000),
17 !pset.get<bool>(
"broadcast_mode", false))
18 , num_art_processes_(pset.get<size_t>(
"art_analyzer_count", 1))
19 , num_fragments_per_event_(pset.get<size_t>(
"expected_fragments_per_event"))
20 , queue_size_(pset.get<size_t>(
"buffer_count"))
23 , update_run_ids_(pset.get<bool>(
"update_run_ids_on_new_fragment", true))
24 , overwrite_mode_(!pset.get<bool>(
"use_art", true) || pset.get<bool>(
"overwrite_mode", false) || pset.get<bool>(
"broadcast_mode", false))
25 , send_init_fragments_(pset.get<bool>(
"send_init_fragments", true))
26 , buffer_writes_pending_()
27 , incomplete_event_report_interval_ms_(pset.get<int>(
"incomplete_event_report_interval_ms", -1))
28 , last_incomplete_event_report_time_(std::chrono::steady_clock::now())
29 , broadcast_timeout_ms_(pset.get<int>(
"fragment_broadcast_timeout_ms", 3000))
31 , subrun_event_count_(0)
34 , current_art_pset_(art_pset)
36 , broadcasts_(pset.get<uint32_t>(
"broadcast_shared_memory_key", 0xCEE70000 + getpid()),
37 pset.get<size_t>(
"broadcast_buffer_count", 10),
38 pset.get<size_t>(
"broadcast_buffer_size", 0x100000),
39 pset.get<int>(
"fragment_broadcast_timeout_ms", 3000) * 1000, false)
41 SetMinWriteSize(
sizeof(detail::RawEventHeader) +
sizeof(detail::RawFragmentHeader));
42 broadcasts_.SetMinWriteSize(
sizeof(detail::RawEventHeader) +
sizeof(detail::RawFragmentHeader));
44 if (pset.get<
bool>(
"use_art",
true) ==
false) {
45 TLOG(TLVL_INFO) <<
"BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:false" << TLOG_ENDL;
46 num_art_processes_ = 0;
49 TLOG(TLVL_INFO) <<
"BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:true" << TLOG_ENDL;
50 TLOG(TLVL_TRACE) <<
"art_pset is " << art_pset.to_string() << TLOG_ENDL;
52 current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
54 if (overwrite_mode_ && num_art_processes_ > 0)
56 TLOG(TLVL_WARNING) <<
"Art is configured to run, but overwrite mode is enabled! Check your configuration if this in unintentional!" << TLOG_ENDL;
58 else if (overwrite_mode_)
60 TLOG(TLVL_INFO) <<
"Overwrite Mode enabled, no configured art processes at startup" << TLOG_ENDL;
63 for (
size_t ii = 0; ii < size(); ++ii)
65 buffer_writes_pending_[ii] = 0;
68 if (!IsValid())
throw cet::exception(app_name +
"_SharedMemoryEventManager") <<
"Unable to attach to Shared Memory!";
70 TLOG(TLVL_TRACE) <<
"Setting Writer rank to " << my_rank << TLOG_ENDL;
72 TLOG(TLVL_DEBUG) <<
"Writer Rank is " << GetRank() << TLOG_ENDL;
75 TLOG(TLVL_TRACE) <<
"END CONSTRUCTOR" << TLOG_ENDL;
80 TLOG(TLVL_TRACE) <<
"DESTRUCTOR" << TLOG_ENDL;
82 TLOG(TLVL_TRACE) <<
"Destructor END" << TLOG_ENDL;
85 bool artdaq::SharedMemoryEventManager::AddFragment(detail::RawFragmentHeader frag,
void* dataPtr)
87 TLOG(TLVL_TRACE) <<
"AddFragment(Header, ptr) BEGIN frag.word_count=" << std::to_string(frag.word_count)
88 <<
", sequence_id=" << std::to_string(frag.sequence_id) << TLOG_ENDL;
89 auto buffer = getBufferForSequenceID_(frag.sequence_id,
true, frag.timestamp);
90 TLOG(TLVL_TRACE) <<
"Using buffer " << std::to_string(buffer) << TLOG_ENDL;
91 if (buffer == -1)
return false;
94 TLOG(TLVL_ERROR) <<
"Dropping event because data taking has already passed this event number: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
98 auto hdr = getEventHeader_(buffer);
101 hdr->run_id = run_id_;
102 hdr->subrun_id = subrun_id_;
105 TLOG(TLVL_TRACE) <<
"AddFragment before Write calls" << TLOG_ENDL;
106 Write(buffer, dataPtr, frag.word_count *
sizeof(RawDataType));
108 TLOG(TLVL_TRACE) <<
"Checking for complete event" << TLOG_ENDL;
109 auto fragmentCount = GetFragmentCount(frag.sequence_id);
110 hdr->is_complete = fragmentCount == num_fragments_per_event_ && buffer_writes_pending_[buffer] == 0;
111 TLOG(TLVL_TRACE) <<
"hdr->is_complete=" << std::boolalpha << hdr->is_complete
112 <<
", fragmentCount=" << std::to_string(fragmentCount)
113 <<
", num_fragments_per_event=" << std::to_string(num_fragments_per_event_)
114 <<
", buffer_writes_pending_[buffer]=" << std::to_string(buffer_writes_pending_[buffer]) << TLOG_ENDL;
116 complete_buffer_(buffer);
117 requests_.SendRequest(
true);
119 TLOG(TLVL_TRACE) <<
"AddFragment END" << TLOG_ENDL;
123 bool artdaq::SharedMemoryEventManager::AddFragment(FragmentPtr frag,
size_t timeout_usec, FragmentPtr& outfrag)
125 TLOG(TLVL_TRACE) <<
"AddFragment(FragmentPtr) BEGIN" << TLOG_ENDL;
126 auto hdr = *
reinterpret_cast<detail::RawFragmentHeader*
>(frag->headerAddress());
127 auto data = frag->headerAddress();
128 auto start = std::chrono::steady_clock::now();
130 while (!sts && TimeUtils::GetElapsedTimeMicroseconds(start) < timeout_usec)
132 sts = AddFragment(hdr, data);
133 if (!sts) usleep(1000);
137 outfrag = std::move(frag);
139 TLOG(TLVL_TRACE) <<
"AddFragment(FragmentPtr) RETURN " << std::boolalpha << sts << TLOG_ENDL;
145 TLOG(14) <<
"WriteFragmentHeader BEGIN" << TLOG_ENDL;
146 auto buffer = getBufferForSequenceID_(frag.sequence_id,
true, frag.timestamp);
150 if (buffer == -1 && !dropIfNoBuffersAvailable)
return nullptr;
153 TLOG(TLVL_ERROR) <<
"Dropping fragment because data taking has already passed this event number: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
157 TLOG(TLVL_ERROR) <<
"Dropping fragment because there is no room in the queue and reliable mode is off: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
159 dropped_data_.reset(
new Fragment(frag.word_count - frag.num_words()));
160 return dropped_data_->dataBegin();
163 buffer_writes_pending_[buffer]++;
164 TraceLock lk(buffer_mutexes_[buffer], 50,
"WriteFragmentHeader");
165 Write(buffer, &frag, frag.num_words() *
sizeof(RawDataType));
167 auto pos =
reinterpret_cast<RawDataType*
>(GetWritePos(buffer));
168 if (frag.word_count - frag.num_words() > 0) {
169 IncrementWritePos(buffer, (frag.word_count - frag.num_words()) *
sizeof(RawDataType));
172 TLOG(14) <<
"WriteFragmentHeader END" << TLOG_ENDL;
179 TLOG(TLVL_TRACE) <<
"DoneWritingFragment BEGIN" << TLOG_ENDL;
180 auto buffer = getBufferForSequenceID_(frag.sequence_id,
false, frag.timestamp);
181 if (buffer == -1) Detach(
true,
"SharedMemoryEventManager",
"getBufferForSequenceID_ returned -1 when it REALLY shouldn't have! Check program logic!");
182 if (buffer == -2)
return;
183 TraceLock lk(buffer_mutexes_[buffer], 50,
"DoneWritingFragment");
185 auto hdr = getEventHeader_(buffer);
188 hdr->run_id = run_id_;
189 hdr->subrun_id = subrun_id_;
192 buffer_writes_pending_[buffer]--;
193 if (buffer_writes_pending_[buffer] != 0)
195 TLOG(TLVL_TRACE) <<
"Done writing fragment, but there's another writer. Not doing bookkeeping steps." << TLOG_ENDL;
198 auto frag_count = GetFragmentCount(frag.sequence_id);
199 hdr->is_complete = frag_count == num_fragments_per_event_;
200 #if ART_SUPPORTS_DUPLICATE_EVENTS
201 if (!hdr->is_complete && released_incomplete_events_.count(frag.sequence_id)) {
202 hdr->is_complete = frag_count == released_incomplete_events_[frag.sequence_id] && buffer_writes_pending_[buffer] == 0;
206 complete_buffer_(buffer);
207 requests_.SendRequest(
true);
208 TLOG(TLVL_TRACE) <<
"DoneWritingFragment END" << TLOG_ENDL;
213 return GetFragmentCountInBuffer(getBufferForSequenceID_(seqID,
false), type);
218 if (buffer == -1)
return 0;
219 ResetReadPos(buffer);
220 IncrementReadPos(buffer,
sizeof(detail::RawEventHeader));
224 while (MoreDataInBuffer(buffer))
226 auto fragHdr =
reinterpret_cast<artdaq::detail::RawFragmentHeader*
>(GetReadPos(buffer));
227 IncrementReadPos(buffer, fragHdr->word_count *
sizeof(RawDataType));
228 if (type != Fragment::InvalidFragmentType && fragHdr->type != type)
continue;
229 TLOG_TRACE(
"GetFragmentCount") <<
"Adding Fragment with size=" << std::to_string(fragHdr->word_count) <<
" to Fragment count" << TLOG_ENDL;
241 TLOG(TLVL_INFO) <<
"Starting art process with config file " << config_file->getFileName() << TLOG_ENDL;
242 std::vector<char*> args{ (
char*)
"art", (
char*)
"-c", &config_file->getFileName()[0], NULL };
247 execvp(
"art", &args[0]);
252 TLOG(TLVL_INFO) <<
"PID of new art process is " << pid << TLOG_ENDL;
253 art_processes_.insert(pid);
255 waitpid(pid, &status, 0);
256 TLOG(TLVL_INFO) <<
"Removing PID " << pid <<
" from process list" << TLOG_ENDL;
257 art_processes_.erase(pid);
260 TLOG(TLVL_INFO) <<
"art process " << pid <<
" exited normally, " << (restart_art_ ?
"restarting" :
"not restarting") << TLOG_ENDL;
264 TLOG(TLVL_WARNING) <<
"art process " << pid <<
" exited with status code 0x" << std::hex << status <<
" (" << std::dec << status <<
"), " << (restart_art_ ?
"restarting" :
"not restarting") << TLOG_ENDL;
272 if (num_art_processes_ == 0)
return;
273 for (
size_t ii = 0; ii < num_art_processes_; ++ii)
275 StartArtProcess(current_art_pset_);
281 static std::mutex start_art_mutex;
282 TraceLock lk(start_art_mutex, 15,
"StartArtLock");
284 auto initialCount = GetAttachedCount();
285 auto startTime = std::chrono::steady_clock::now();
287 if (pset != current_art_pset_)
289 current_art_pset_ = pset;
290 current_art_config_file_ = std::make_shared<art_config_file>(pset);
293 boost::thread thread([&] {RunArt(current_art_config_file_, pid); });
297 while ((GetAttachedCount() - initialCount < 1 || pid <= 0)
298 && TimeUtils::GetElapsedTime(startTime) < 5)
302 if (GetAttachedCount() - initialCount < 1 || pid <= 0)
304 TLOG(TLVL_WARNING) <<
"art process has not started after 5s. Check art configuration!"
305 <<
" (pid=" << pid <<
", attachedCount=" << std::to_string(GetAttachedCount() - initialCount) <<
")" << TLOG_ENDL;
310 TLOG(TLVL_INFO) << std::setw(4) << std::fixed <<
"art initialization took "
311 << TimeUtils::GetElapsedTime(startTime) <<
" seconds." << TLOG_ENDL;
320 restart_art_ =
false;
321 current_art_config_file_ =
nullptr;
322 current_art_pset_ = fhicl::ParameterSet();
324 for (
auto pid : pids)
326 if (kill(pid, 0) >= 0)
331 if (pids.size() == 0)
333 TLOG(14) <<
"All art processes already exited, nothing to do." << TLOG_ENDL;
338 TLOG(TLVL_TRACE) <<
"Gently informing art processes that it is time to shut down" << TLOG_ENDL;
339 for (
auto pid : pids)
344 int graceful_wait_ms = 1000;
345 int int_wait_ms = 100;
347 TLOG(TLVL_TRACE) <<
"Waiting up to " << graceful_wait_ms <<
" ms for all art processes to exit gracefully" << TLOG_ENDL;
348 for (
int ii = 0; ii < graceful_wait_ms; ++ii)
352 for (
auto pid : pids)
354 if (kill(pid, 0) < 0)
359 if (pids.size() == 0)
361 TLOG(TLVL_TRACE) <<
"All art processes exited after " << ii <<
" ms." << TLOG_ENDL;
366 TLOG(TLVL_TRACE) <<
"Insisting that the art processes shut down" << TLOG_ENDL;
367 for (
auto pid : pids)
372 TLOG(TLVL_TRACE) <<
"Waiting up to " << int_wait_ms <<
" ms for all art processes to exit" << TLOG_ENDL;
373 for (
int ii = graceful_wait_ms; ii < graceful_wait_ms + int_wait_ms; ++ii)
377 for (
auto pid : pids)
379 if (kill(pid, 0) < 0)
385 if (pids.size() == 0)
387 TLOG(TLVL_TRACE) <<
"All art processes exited after " << ii <<
" ms." << TLOG_ENDL;
392 TLOG(TLVL_TRACE) <<
"Killing remaning art processes with extreme prejudice" << TLOG_ENDL;
393 while (pids.size() > 0)
395 kill(*pids.begin(), SIGKILL);
401 TLOG(TLVL_DEBUG) <<
"ReconfigureArt BEGIN" << TLOG_ENDL;
406 for (
size_t ii = 0; ii < broadcasts_.size(); ++ii)
408 broadcasts_.MarkBufferEmpty(ii,
true);
410 if (newRun == 0) newRun = run_id_ + 1;
411 current_art_pset_ = art_pset;
412 current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
414 if (n_art_processes != -1)
416 TLOG(TLVL_INFO) <<
"Setting number of art processes to " << n_art_processes << TLOG_ENDL;
417 num_art_processes_ = n_art_processes;
420 TLOG(TLVL_DEBUG) <<
"ReconfigureArt END" << TLOG_ENDL;
425 init_fragment_.reset(
nullptr);
426 TLOG(TLVL_TRACE) <<
"SharedMemoryEventManager::endOfData" << TLOG_ENDL;
427 restart_art_ =
false;
429 size_t initialStoreSize = GetIncompleteEventCount();
430 TLOG(TLVL_TRACE) <<
"endOfData: Flushing " << initialStoreSize
431 <<
" stale events from the SharedMemoryEventManager." << TLOG_ENDL;
432 int counter = initialStoreSize;
433 while (active_buffers_.size() > 0 && counter > 0)
435 complete_buffer_(*active_buffers_.begin());
438 TLOG(TLVL_TRACE) <<
"endOfData: Done flushing, there are now " << GetIncompleteEventCount()
439 <<
" stale events in the SharedMemoryEventManager." << TLOG_ENDL;
442 TLOG(TLVL_TRACE) <<
"Waiting for " << std::to_string(ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_))) <<
" outstanding buffers..." << TLOG_ENDL;
443 auto start = std::chrono::steady_clock::now();
444 auto lastReadCount = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
447 while (lastReadCount > 0 && TimeUtils::GetElapsedTime(start) < 1)
449 auto temp = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
450 if (temp != lastReadCount)
452 TLOG(TLVL_TRACE) <<
"Waiting for " << std::to_string(temp) <<
" outstanding buffers..." << TLOG_ENDL;
453 lastReadCount = temp;
454 start = std::chrono::steady_clock::now();
456 if (lastReadCount > 0) usleep(1000);
459 TLOG(TLVL_TRACE) <<
"endOfData: Broadcasting EndOfData Fragment" << TLOG_ENDL;
460 FragmentPtr outFrag = std::move(Fragment::eodFrag(GetBufferCount()));
461 bool success = broadcastFragment_(std::move(outFrag), outFrag);
464 TLOG(TLVL_TRACE) <<
"endOfData: Clearing buffers to make room for EndOfData Fragment" << TLOG_ENDL;
465 for (
size_t ii = 0; ii < size(); ++ii)
467 broadcasts_.MarkBufferEmpty(ii,
true);
469 broadcastFragment_(std::move(outFrag), outFrag);
472 TLOG(TLVL_DEBUG) <<
"Waiting for all art processes to exit, there are " << std::to_string(art_processes_.size()) <<
" remaining." << TLOG_ENDL;
473 while (art_processes_.size() > 0)
475 ShutdownArtProcesses(art_processes_);
477 ResetAttachedCount();
479 TLOG(TLVL_TRACE) <<
"endOfData: Clearing buffers" << TLOG_ENDL;
480 for (
size_t ii = 0; ii < size(); ++ii)
482 MarkBufferEmpty(ii,
true);
484 released_incomplete_events_.clear();
486 TLOG(TLVL_TRACE) <<
"endOfData END" << TLOG_ENDL;
487 TLOG(TLVL_INFO) <<
"EndOfData Complete. There were " << GetLastSeenBufferID() <<
" events processed in this run." << TLOG_ENDL;
493 init_fragment_.reset(
nullptr);
497 requests_.SendRoutingToken(queue_size_);
498 TLOG(TLVL_DEBUG) <<
"Starting run " << run_id_
499 <<
", max queue size = "
502 << GetLockedBufferCount() << TLOG_ENDL;
505 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
506 metricMan->sendMetric(
"Run Number", runSubrun,
"Run:Subrun", 1, MetricMode::LastPoint);
515 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
516 metricMan->sendMetric(
"Run Number", runSubrun,
"Run:Subrun", 1, MetricMode::LastPoint);
522 FragmentPtr endOfRunFrag(
new
523 Fragment(static_cast<size_t>
524 (ceil(
sizeof(my_rank) /
525 static_cast<double>(
sizeof(Fragment::value_type))))));
527 endOfRunFrag->setSystemType(Fragment::EndOfRunFragmentType);
528 *endOfRunFrag->dataBegin() = my_rank;
529 broadcastFragment_(std::move(endOfRunFrag), endOfRunFrag);
536 std::unique_ptr<artdaq::Fragment>
538 Fragment(static_cast<size_t>
539 (ceil(
sizeof(my_rank) /
540 static_cast<double>(
sizeof(Fragment::value_type))))));
542 endOfSubrunFrag->setSystemType(Fragment::EndOfSubrunFragmentType);
543 *endOfSubrunFrag->dataBegin() = my_rank;
545 broadcastFragment_(std::move(endOfSubrunFrag), endOfSubrunFrag);
547 TLOG(TLVL_INFO) <<
"Subrun " << subrun_id_ <<
" in run " << run_id_ <<
" has ended. There were " << subrun_event_count_ <<
" events in this subrun." << TLOG_ENDL;
548 subrun_event_count_ = 0;
557 metricMan->sendMetric(
"Incomplete Event Count", GetIncompleteEventCount(),
"events", 1, MetricMode::LastPoint);
558 metricMan->sendMetric(
"Pending Event Count", GetPendingEventCount(),
"events", 1, MetricMode::LastPoint);
561 if (incomplete_event_report_interval_ms_ > 0 && GetLockedBufferCount())
563 if (TimeUtils::GetElapsedTimeMilliseconds(last_incomplete_event_report_time_) < static_cast<size_t>(incomplete_event_report_interval_ms_))
566 last_incomplete_event_report_time_ = std::chrono::steady_clock::now();
567 std::ostringstream oss;
568 oss <<
"Incomplete Events (" << num_fragments_per_event_ <<
"): ";
569 for (
auto& ev : active_buffers_)
571 auto hdr = getEventHeader_(ev);
572 oss << hdr->sequence_id <<
" (" << GetFragmentCount(hdr->sequence_id) <<
"), ";
574 TLOG(TLVL_DEBUG) << oss.str() << TLOG_ENDL;
578 bool artdaq::SharedMemoryEventManager::broadcastFragment_(FragmentPtr frag, FragmentPtr& outFrag)
580 auto buffer = broadcasts_.GetBufferForWriting(
false);
581 auto start_time = std::chrono::steady_clock::now();
582 while (buffer == -1 && TimeUtils::GetElapsedTimeMilliseconds(start_time) < static_cast<size_t>(broadcast_timeout_ms_))
585 buffer = broadcasts_.GetBufferForWriting(
false);
589 TLOG(TLVL_ERROR) <<
"Broadcast of fragment type " << frag->typeString() <<
" failed due to timeout waiting for buffer!" << TLOG_ENDL;
594 auto hdr =
reinterpret_cast<detail::RawEventHeader*
>(broadcasts_.GetBufferStart(buffer));
595 hdr->run_id = run_id_;
596 hdr->subrun_id = subrun_id_;
597 hdr->sequence_id = frag->sequenceID();
598 hdr->is_complete =
true;
599 broadcasts_.IncrementWritePos(buffer,
sizeof(detail::RawEventHeader));
601 TLOG(TLVL_TRACE) <<
"broadcastFragment_ before Write calls" << TLOG_ENDL;
602 broadcasts_.Write(buffer, frag->headerAddress(), frag->size() *
sizeof(RawDataType));
604 broadcasts_.MarkBufferFull(buffer, -1);
609 artdaq::detail::RawEventHeader* artdaq::SharedMemoryEventManager::getEventHeader_(
int buffer)
611 return reinterpret_cast<detail::RawEventHeader*
>(GetBufferStart(buffer));
614 int artdaq::SharedMemoryEventManager::getBufferForSequenceID_(Fragment::sequence_id_t seqID,
bool create_new, Fragment::timestamp_t timestamp)
616 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
617 TLOG(14) <<
"getBufferForSequenceID " << std::to_string(seqID) <<
" BEGIN" << TLOG_ENDL;
618 auto buffers = GetBuffersOwnedByManager();
619 for (
auto& buf : buffers)
621 auto hdr = getEventHeader_(buf);
622 if (hdr->sequence_id == seqID)
624 TLOG(14) <<
"getBufferForSequenceID " << std::to_string(seqID) <<
" returning " << buf << TLOG_ENDL;
629 #if !ART_SUPPORTS_DUPLICATE_EVENTS
630 if (released_incomplete_events_.count(seqID)) {
631 TLOG(TLVL_ERROR) <<
"Buffer has already been marked \"Incomplete\" and sent to art!" << TLOG_ENDL;
636 if (!create_new)
return -1;
638 check_pending_buffers_(lk);
639 int new_buffer = GetBufferForWriting(
false);
641 if (new_buffer == -1)
643 new_buffer = GetBufferForWriting(overwrite_mode_);
646 if (new_buffer == -1)
return -1;
647 TraceLock(buffer_mutexes_[new_buffer], 34,
"getBufferForSequenceID");
648 auto hdr = getEventHeader_(new_buffer);
649 hdr->is_complete =
false;
650 hdr->run_id = run_id_;
651 hdr->subrun_id = subrun_id_;
652 hdr->sequence_id = seqID;
653 buffer_writes_pending_[new_buffer] = 0;
654 IncrementWritePos(new_buffer,
sizeof(detail::RawEventHeader));
656 active_buffers_.insert(new_buffer);
658 if (timestamp != Fragment::InvalidTimestamp)
660 requests_.AddRequest(seqID, timestamp);
662 requests_.SendRequest();
663 TLOG(14) <<
"getBufferForSequenceID " << std::to_string(seqID) <<
" returning newly initialized buffer " << new_buffer << TLOG_ENDL;
667 bool artdaq::SharedMemoryEventManager::hasFragments_(
int buffer)
669 if (buffer == -1)
return true;
670 if (!CheckBuffer(buffer, BufferSemaphoreFlags::Writing))
674 ResetReadPos(buffer);
675 IncrementReadPos(buffer,
sizeof(detail::RawEventHeader));
676 return MoreDataInBuffer(buffer);
679 void artdaq::SharedMemoryEventManager::complete_buffer_(
int buffer)
681 auto hdr = getEventHeader_(buffer);
682 if (hdr->is_complete)
684 TLOG(TLVL_DEBUG) <<
"complete_buffer_: This fragment completes event " << std::to_string(hdr->sequence_id) <<
"." << TLOG_ENDL;
686 requests_.RemoveRequest(hdr->sequence_id);
687 requests_.SendRoutingToken(1);
689 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
690 active_buffers_.erase(buffer);
691 pending_buffers_.insert(buffer);
694 check_pending_buffers_();
697 bool artdaq::SharedMemoryEventManager::bufferComparator(
int bufA,
int bufB)
699 return getEventHeader_(bufA)->sequence_id < getEventHeader_(bufB)->sequence_id;
702 void artdaq::SharedMemoryEventManager::check_pending_buffers_(std::unique_lock<std::mutex>
const& lock)
704 TLOG(TLVL_TRACE) <<
"check_pending_buffers_ BEGIN Locked=" << std::boolalpha << lock.owns_lock() << TLOG_ENDL;
706 auto buffers = GetBuffersOwnedByManager();
707 for (
auto buf : buffers)
709 if (ResetBuffer(buf) && !pending_buffers_.count(buf))
711 auto hdr = getEventHeader_(buf);
712 if (active_buffers_.count(buf))
714 TLOG(TLVL_WARNING) <<
"Active event " << std::to_string(hdr->sequence_id) <<
" is stale. Scheduling release of incomplete event to art." << TLOG_ENDL;
715 requests_.RemoveRequest(hdr->sequence_id);
716 requests_.SendRoutingToken(1);
717 active_buffers_.erase(buf);
718 pending_buffers_.insert(buf);
719 if (!released_incomplete_events_.count(hdr->sequence_id)) {
720 released_incomplete_events_[hdr->sequence_id] = num_fragments_per_event_ - GetFragmentCountInBuffer(buf);
723 released_incomplete_events_[hdr->sequence_id] -= GetFragmentCountInBuffer(buf);
730 Fragment::sequence_id_t lowestSeqId = Fragment::InvalidSequenceID;
733 if (WriteReadyCount(
false) != 0)
735 for (
auto buf : active_buffers_)
737 auto hdr = getEventHeader_(buf);
738 TLOG(TLVL_TRACE) <<
"Buffer: " << buf <<
", SeqID: " << std::to_string(hdr->sequence_id) <<
", ACTIVE" << TLOG_ENDL;
739 if (hdr->sequence_id < lowestSeqId)
741 lowestSeqId = hdr->sequence_id;
744 TLOG(TLVL_TRACE) <<
"Lowest SeqID held: " << std::to_string(lowestSeqId) << TLOG_ENDL;
747 std::list<int> sorted_buffers(pending_buffers_.begin(), pending_buffers_.end());
748 sorted_buffers.sort([
this](
int a,
int b) {
return bufferComparator(a, b); });
749 for (
auto buf : sorted_buffers)
751 auto hdr = getEventHeader_(buf);
752 if (hdr->sequence_id > lowestSeqId)
break;
753 TLOG(TLVL_DEBUG) <<
"Releasing event " << std::to_string(hdr->sequence_id) <<
" in buffer " << buf <<
" to art." << TLOG_ENDL;
755 subrun_event_count_++;
756 pending_buffers_.erase(buf);
759 TLOG(TLVL_TRACE) <<
"check_pending_buffers_: Sending Metrics" << TLOG_ENDL;
762 auto full = ReadReadyCount();
763 auto empty = WriteReadyCount(overwrite_mode_);
765 metricMan->sendMetric(
"Shared Memory Full Buffers", full,
"buffers", 2, MetricMode::LastPoint);
766 metricMan->sendMetric(
"Shared Memory Available Buffers", empty,
"buffers", 2, MetricMode::LastPoint);
767 metricMan->sendMetric(
"Shared Memory Full %", full * 100 / static_cast<double>(total),
"%", 2, MetricMode::LastPoint);
768 metricMan->sendMetric(
"Shared Memory Available %", empty * 100 / static_cast<double>(total),
"%", 2, MetricMode::LastPoint);
770 TLOG(TLVL_TRACE) <<
"check_pending_buffers_ END" << TLOG_ENDL;
773 void artdaq::SharedMemoryEventManager::send_init_frag_()
775 if (init_fragment_ !=
nullptr)
777 TLOG(TLVL_TRACE) <<
"Sending init Fragment to art..." << TLOG_ENDL;
780 std::string fileName =
"receiveInitMessage_" + std::to_string(my_rank) +
".bin";
781 std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
782 ostream.write(reinterpret_cast<char*>(init_fragment_->dataBeginBytes()), init_fragment_->dataSizeBytes());
786 broadcastFragment_(std::move(init_fragment_), init_fragment_);
787 TLOG(TLVL_TRACE) <<
"Init Fragment sent" << TLOG_ENDL;
789 else if (send_init_fragments_)
791 TLOG(TLVL_WARNING) <<
"Cannot send init fragment because I haven't yet received one!" << TLOG_ENDL;
797 if (!init_fragment_ || init_fragment_ ==
nullptr)
799 init_fragment_.swap(frag);
void RunArt(std::shared_ptr< art_config_file > config_file, pid_t &pid_out)
Run an art instance, recording the return codes and restarting it until the end flag is raised...
virtual ~SharedMemoryEventManager()
SharedMemoryEventManager Destructor.
void ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun=0, int n_art_processes=-1)
Restart all art processes, using the given fhicl code to configure the new art processes.
pid_t StartArtProcess(fhicl::ParameterSet pset)
Start one art process.
RawDataType * WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable=false)
Get a pointer to a reserved memory area for the given Fragment header.
RawEvent::run_id_t run_id_t
Copy RawEvent::run_id_t into local scope.
size_t GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in an event.
void StartArt()
Start all the art processes.
void ShutdownArtProcesses(std::set< pid_t > pids)
Shutdown a set of art processes.
void SetInitFragment(FragmentPtr frag)
Set the stored Init fragment, if one has not yet been set already.
void sendMetrics()
Send metrics to the MetricManager, if one has been instantiated in the application.
void startSubrun()
Start a new Subrun, incrementing the subrun number.
SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset)
SharedMemoryEventManager Constructor.
bool endSubrun()
Send an EndOfSubRunFragment to the art thread.
bool endRun()
Send an EndOfRunFragment to the art thread.
void DoneWritingFragment(detail::RawFragmentHeader frag)
Used to indicate that the given Fragment is now completely in the buffer. Will check for buffer compl...
bool endOfData()
Indicate that the end of input has been reached to the art processes.
void startRun(run_id_t runID)
Start a Run.
size_t GetFragmentCountInBuffer(int buffer, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in a buffer.