2 #define TRACE_NAME (app_name + "_SharedMemoryEventManager").c_str()
4 #include "artdaq/DAQrate/SharedMemoryEventManager.hh"
5 #include "artdaq-core/Core/StatisticsCollection.hh"
6 #include "artdaq-core/Utilities/TraceLock.hh"
9 std::mutex artdaq::SharedMemoryEventManager::sequence_id_mutex_;
12 : SharedMemoryManager(pset.get<uint32_t>(
"shared_memory_key", 0xBEE70000 + getpid()),
13 pset.get<size_t>(
"buffer_count"),
14 pset.has_key(
"max_event_size_bytes") ? pset.get<size_t>(
"max_event_size_bytes") : pset.get<size_t>(
"expected_fragments_per_event") * pset.get<size_t>(
"max_fragment_size_bytes"),
15 pset.get<size_t>(
"stale_buffer_timeout_usec", pset.get<size_t>(
"event_queue_wait_time", 5) * 1000000),
16 !pset.get<bool>(
"broadcast_mode", false))
17 , num_art_processes_(pset.get<size_t>(
"art_analyzer_count", 1))
18 , num_fragments_per_event_(pset.get<size_t>(
"expected_fragments_per_event"))
19 , queue_size_(pset.get<size_t>(
"buffer_count"))
22 , update_run_ids_(pset.get<bool>(
"update_run_ids_on_new_fragment", true))
23 , overwrite_mode_(!pset.get<bool>(
"use_art", true) || pset.get<bool>(
"overwrite_mode", false) || pset.get<bool>(
"broadcast_mode", false))
24 , send_init_fragments_(pset.get<bool>(
"send_init_fragments", true))
26 , buffer_writes_pending_()
27 , incomplete_event_report_interval_ms_(pset.get<int>(
"incomplete_event_report_interval_ms", -1))
28 , last_incomplete_event_report_time_(std::chrono::steady_clock::now())
29 , broadcast_timeout_ms_(pset.get<int>(
"fragment_broadcast_timeout_ms", 3000))
31 , run_incomplete_event_count_(0)
32 , subrun_event_count_(0)
33 , subrun_incomplete_event_count_(0)
36 , current_art_pset_(art_pset)
37 , minimum_art_lifetime_s_(pset.get<double>(
"minimum_art_lifetime_s", 2.0))
38 , art_event_processing_time_us_(pset.get<size_t>(
"expected_art_event_processing_time_us", 100000))
41 , broadcasts_(pset.get<uint32_t>(
"broadcast_shared_memory_key", 0xCEE70000 + getpid()),
42 pset.get<size_t>(
"broadcast_buffer_count", 10),
43 pset.get<size_t>(
"broadcast_buffer_size", 0x100000),
44 pset.get<int>(
"expected_art_event_processing_time_us", 100000) * pset.get<size_t>(
"buffer_count"), false)
46 SetMinWriteSize(
sizeof(detail::RawEventHeader) +
sizeof(detail::RawFragmentHeader));
47 broadcasts_.SetMinWriteSize(
sizeof(detail::RawEventHeader) +
sizeof(detail::RawFragmentHeader));
49 if (pset.get<
bool>(
"use_art",
true) ==
false) {
50 TLOG(TLVL_INFO) <<
"BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:false";
51 num_art_processes_ = 0;
54 TLOG(TLVL_INFO) <<
"BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:true";
55 TLOG(TLVL_TRACE) <<
"art_pset is " << art_pset.to_string();
57 current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
59 if (overwrite_mode_ && num_art_processes_ > 0)
61 TLOG(TLVL_WARNING) <<
"Art is configured to run, but overwrite mode is enabled! Check your configuration if this in unintentional!";
63 else if (overwrite_mode_)
65 TLOG(TLVL_INFO) <<
"Overwrite Mode enabled, no configured art processes at startup";
68 for (
size_t ii = 0; ii < size(); ++ii)
70 buffer_writes_pending_[ii] = 0;
73 if (!IsValid())
throw cet::exception(app_name +
"_SharedMemoryEventManager") <<
"Unable to attach to Shared Memory!";
75 TLOG(TLVL_TRACE) <<
"Setting Writer rank to " << my_rank;
77 TLOG(TLVL_DEBUG) <<
"Writer Rank is " << GetRank();
80 TLOG(TLVL_TRACE) <<
"END CONSTRUCTOR";
85 TLOG(TLVL_TRACE) <<
"DESTRUCTOR";
86 if (running_) endOfData();
87 TLOG(TLVL_TRACE) <<
"Destructor END";
90 bool artdaq::SharedMemoryEventManager::AddFragment(detail::RawFragmentHeader frag,
void* dataPtr)
92 TLOG(TLVL_TRACE) <<
"AddFragment(Header, ptr) BEGIN frag.word_count=" << std::to_string(frag.word_count)
93 <<
", sequence_id=" << std::to_string(frag.sequence_id);
94 auto buffer = getBufferForSequenceID_(frag.sequence_id,
true, frag.timestamp);
95 TLOG(TLVL_TRACE) <<
"Using buffer " << std::to_string(buffer);
96 if (buffer == -1)
return false;
99 TLOG(TLVL_ERROR) <<
"Dropping event because data taking has already passed this event number: " << std::to_string(frag.sequence_id);
103 auto hdr = getEventHeader_(buffer);
106 hdr->run_id = run_id_;
107 hdr->subrun_id = subrun_id_;
110 TLOG(TLVL_TRACE) <<
"AddFragment before Write calls";
111 Write(buffer, dataPtr, frag.word_count *
sizeof(RawDataType));
113 TLOG(TLVL_TRACE) <<
"Checking for complete event";
114 auto fragmentCount = GetFragmentCount(frag.sequence_id);
115 hdr->is_complete = fragmentCount == num_fragments_per_event_ && buffer_writes_pending_[buffer] == 0;
116 TLOG(TLVL_TRACE) <<
"hdr->is_complete=" << std::boolalpha << hdr->is_complete
117 <<
", fragmentCount=" << std::to_string(fragmentCount)
118 <<
", num_fragments_per_event=" << std::to_string(num_fragments_per_event_)
119 <<
", buffer_writes_pending_[buffer]=" << std::to_string(buffer_writes_pending_[buffer]);
121 complete_buffer_(buffer);
122 if (requests_) requests_->SendRequest(
true);
124 TLOG(TLVL_TRACE) <<
"AddFragment END";
128 bool artdaq::SharedMemoryEventManager::AddFragment(FragmentPtr frag,
size_t timeout_usec, FragmentPtr& outfrag)
130 TLOG(TLVL_TRACE) <<
"AddFragment(FragmentPtr) BEGIN";
131 auto hdr = *
reinterpret_cast<detail::RawFragmentHeader*
>(frag->headerAddress());
132 auto data = frag->headerAddress();
133 auto start = std::chrono::steady_clock::now();
135 while (!sts && TimeUtils::GetElapsedTimeMicroseconds(start) < timeout_usec)
137 sts = AddFragment(hdr, data);
138 if (!sts) usleep(1000);
142 outfrag = std::move(frag);
144 TLOG(TLVL_TRACE) <<
"AddFragment(FragmentPtr) RETURN " << std::boolalpha << sts;
150 TLOG(14) <<
"WriteFragmentHeader BEGIN";
151 auto buffer = getBufferForSequenceID_(frag.sequence_id,
true, frag.timestamp);
155 if (buffer == -1 && !dropIfNoBuffersAvailable)
return nullptr;
158 TLOG(TLVL_ERROR) <<
"Dropping fragment with sequence id " << std::to_string(frag.sequence_id) <<
" and fragment id " << std::to_string(frag.fragment_id) <<
" because data taking has already passed this event.";
162 TLOG(TLVL_ERROR) <<
"Dropping fragment with sequence id " << std::to_string(frag.sequence_id) <<
" and fragment id " << std::to_string(frag.fragment_id) <<
" because there is no room in the queue and reliable mode is off.";
164 dropped_data_.reset(
new Fragment(frag.word_count - frag.num_words()));
165 return dropped_data_->dataBegin();
170 metricMan->sendMetric(
"Input Fragment Rate", 1,
"Fragments/s", 1, MetricMode::Rate);
173 buffer_writes_pending_[buffer]++;
174 TraceLock lk(buffer_mutexes_[buffer], 50,
"WriteFragmentHeader");
175 Write(buffer, &frag, frag.num_words() *
sizeof(RawDataType));
177 auto pos =
reinterpret_cast<RawDataType*
>(GetWritePos(buffer));
178 if (frag.word_count - frag.num_words() > 0) {
179 IncrementWritePos(buffer, (frag.word_count - frag.num_words()) *
sizeof(RawDataType));
182 TLOG(14) <<
"WriteFragmentHeader END";
189 TLOG(TLVL_TRACE) <<
"DoneWritingFragment BEGIN";
190 auto buffer = getBufferForSequenceID_(frag.sequence_id,
false, frag.timestamp);
191 if (buffer == -1) Detach(
true,
"SharedMemoryEventManager",
"getBufferForSequenceID_ returned -1 when it REALLY shouldn't have! Check program logic!");
192 if (buffer == -2)
return;
193 TraceLock lk(buffer_mutexes_[buffer], 50,
"DoneWritingFragment");
195 auto hdr = getEventHeader_(buffer);
198 hdr->run_id = run_id_;
199 hdr->subrun_id = subrun_id_;
202 buffer_writes_pending_[buffer]--;
203 if (buffer_writes_pending_[buffer] != 0)
205 TLOG(TLVL_TRACE) <<
"Done writing fragment, but there's another writer. Not doing bookkeeping steps.";
208 auto frag_count = GetFragmentCount(frag.sequence_id);
209 hdr->is_complete = frag_count == num_fragments_per_event_;
210 #if ART_SUPPORTS_DUPLICATE_EVENTS
211 if (!hdr->is_complete && released_incomplete_events_.count(frag.sequence_id)) {
212 hdr->is_complete = frag_count == released_incomplete_events_[frag.sequence_id] && buffer_writes_pending_[buffer] == 0;
216 complete_buffer_(buffer);
217 if (requests_) requests_->SendRequest(
true);
218 TLOG(TLVL_TRACE) <<
"DoneWritingFragment END";
223 return GetFragmentCountInBuffer(getBufferForSequenceID_(seqID,
false), type);
228 if (buffer == -1)
return 0;
229 ResetReadPos(buffer);
230 IncrementReadPos(buffer,
sizeof(detail::RawEventHeader));
234 while (MoreDataInBuffer(buffer))
236 auto fragHdr =
reinterpret_cast<artdaq::detail::RawFragmentHeader*
>(GetReadPos(buffer));
237 IncrementReadPos(buffer, fragHdr->word_count *
sizeof(RawDataType));
238 if (type != Fragment::InvalidFragmentType && fragHdr->type != type)
continue;
239 TLOG(TLVL_TRACE) <<
"Adding Fragment with size=" << std::to_string(fragHdr->word_count) <<
" to Fragment count";
250 auto start_time = std::chrono::steady_clock::now();
252 TLOG(TLVL_INFO) <<
"Starting art process with config file " << config_file->getFileName();
253 std::vector<char*> args{ (
char*)
"art", (
char*)
"-c", &config_file->getFileName()[0], NULL };
258 execvp(
"art", &args[0]);
263 TLOG(TLVL_INFO) <<
"PID of new art process is " << pid;
264 art_processes_.insert(pid);
266 auto sts = waitid(P_PID, pid, &status, WEXITED);
267 TLOG(TLVL_INFO) <<
"Removing PID " << pid <<
" from process list";
268 art_processes_.erase(pid);
270 TLOG(TLVL_WARNING) <<
"Error occurred in waitid for art process " << pid <<
": " << errno <<
" (" << strerror(errno) <<
").";
272 else if (status.si_code == CLD_EXITED && status.si_status == 0)
274 TLOG(TLVL_INFO) <<
"art process " << pid <<
" exited normally, " << (restart_art_ ?
"restarting" :
"not restarting");
278 auto art_lifetime = TimeUtils::GetElapsedTime(start_time);
279 if (art_lifetime < minimum_art_lifetime_s_) restart_art_ =
false;
281 auto exit_type =
"exited with status code";
282 switch (status.si_code) {
285 exit_type =
"was killed with signal";
292 TLOG((restart_art_ ? TLVL_WARNING : TLVL_ERROR))
293 <<
"art process " << pid <<
" " << exit_type <<
" " << status.si_status
294 << (status.si_code == CLD_DUMPED ?
" (core dumped)" :
"")
295 <<
" after " << std::setprecision(2) << art_lifetime <<
" seconds, "
296 << (restart_art_ ?
"restarting" :
"not restarting");
304 if (num_art_processes_ == 0)
return;
305 for (
size_t ii = 0; ii < num_art_processes_; ++ii)
307 StartArtProcess(current_art_pset_);
313 static std::mutex start_art_mutex;
314 TraceLock lk(start_art_mutex, 15,
"StartArtLock");
316 auto initialCount = GetAttachedCount();
317 auto startTime = std::chrono::steady_clock::now();
319 if (pset != current_art_pset_ || !current_art_config_file_)
321 current_art_pset_ = pset;
322 current_art_config_file_ = std::make_shared<art_config_file>(pset);
325 boost::thread thread([&] {RunArt(current_art_config_file_, pid); });
329 while ((GetAttachedCount() - initialCount < 1 || pid <= 0)
330 && TimeUtils::GetElapsedTime(startTime) < 5)
334 if (GetAttachedCount() - initialCount < 1 || pid <= 0)
336 TLOG(TLVL_WARNING) <<
"art process has not started after 5s. Check art configuration!"
337 <<
" (pid=" << pid <<
", attachedCount=" << std::to_string(GetAttachedCount() - initialCount) <<
")";
342 TLOG(TLVL_INFO) << std::setw(4) << std::fixed <<
"art initialization took "
343 << TimeUtils::GetElapsedTime(startTime) <<
" seconds.";
352 restart_art_ =
false;
356 for (
auto pid = pids.begin(); pid != pids.end();)
358 if (kill(*pid, 0) < 0)
360 pid = pids.erase(pid);
366 if (pids.size() == 0)
368 TLOG(14) <<
"All art processes already exited, nothing to do.";
373 TLOG(TLVL_TRACE) <<
"Gently informing art processes that it is time to shut down";
374 for (
auto pid : pids)
379 int graceful_wait_ms = 5000;
380 int int_wait_ms = 1000;
382 TLOG(TLVL_TRACE) <<
"Waiting up to " << graceful_wait_ms <<
" ms for all art processes to exit gracefully";
383 for (
int ii = 0; ii < graceful_wait_ms; ++ii)
387 for (
auto pid = pids.begin(); pid != pids.end();)
389 if (kill(*pid, 0) < 0)
391 pid = pids.erase(pid);
397 if (pids.size() == 0)
399 TLOG(TLVL_TRACE) <<
"All art processes exited after " << ii <<
" ms.";
404 TLOG(TLVL_TRACE) <<
"Insisting that the art processes shut down";
405 for (
auto pid : pids)
410 TLOG(TLVL_TRACE) <<
"Waiting up to " << int_wait_ms <<
" ms for all art processes to exit";
411 for (
int ii = graceful_wait_ms; ii < graceful_wait_ms + int_wait_ms; ++ii)
415 for (
auto pid = pids.begin(); pid != pids.end();)
417 if (kill(*pid, 0) < 0)
419 pid = pids.erase(pid);
426 if (pids.size() == 0)
428 TLOG(TLVL_TRACE) <<
"All art processes exited after " << ii <<
" ms.";
433 TLOG(TLVL_TRACE) <<
"Killing remaning art processes with extreme prejudice";
434 while (pids.size() > 0)
436 kill(*pids.begin(), SIGKILL);
439 for (
auto pid = pids.begin(); pid != pids.end();)
441 if (kill(*pid, 0) < 0)
443 pid = pids.erase(pid);
454 TLOG(TLVL_DEBUG) <<
"ReconfigureArt BEGIN";
459 for (
size_t ii = 0; ii < broadcasts_.size(); ++ii)
461 broadcasts_.MarkBufferEmpty(ii,
true);
463 if (newRun == 0) newRun = run_id_ + 1;
465 if (art_pset != current_art_pset_ || !current_art_config_file_) {
466 current_art_pset_ = art_pset;
467 current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
470 if (n_art_processes != -1)
472 TLOG(TLVL_INFO) <<
"Setting number of art processes to " << n_art_processes;
473 num_art_processes_ = n_art_processes;
476 TLOG(TLVL_DEBUG) <<
"ReconfigureArt END";
481 init_fragment_.reset(
nullptr);
482 TLOG(TLVL_TRACE) <<
"SharedMemoryEventManager::endOfData";
483 restart_art_ =
false;
485 size_t initialStoreSize = GetIncompleteEventCount();
486 TLOG(TLVL_TRACE) <<
"endOfData: Flushing " << initialStoreSize
487 <<
" stale events from the SharedMemoryEventManager.";
488 int counter = initialStoreSize;
489 while (active_buffers_.size() > 0 && counter > 0)
491 complete_buffer_(*active_buffers_.begin());
494 TLOG(TLVL_TRACE) <<
"endOfData: Done flushing, there are now " << GetIncompleteEventCount()
495 <<
" stale events in the SharedMemoryEventManager.";
498 TLOG(TLVL_TRACE) <<
"Waiting for " << std::to_string(ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_))) <<
" outstanding buffers...";
499 auto start = std::chrono::steady_clock::now();
500 auto lastReadCount = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
501 auto end_of_data_wait_us = art_event_processing_time_us_ * size();
504 while (lastReadCount > 0 && (end_of_data_wait_us == 0 || TimeUtils::GetElapsedTimeMicroseconds(start) < end_of_data_wait_us) && art_processes_.size() > 0)
506 auto temp = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
507 if (temp != lastReadCount)
509 TLOG(TLVL_TRACE) <<
"Waiting for " << std::to_string(temp) <<
" outstanding buffers...";
510 lastReadCount = temp;
511 start = std::chrono::steady_clock::now();
513 if (lastReadCount > 0) usleep(art_event_processing_time_us_);
515 TLOG(TLVL_TRACE) <<
"endOfData: After wait for outstanding buffers. Still outstanding: " << lastReadCount <<
", time waited: " << TimeUtils::GetElapsedTime(start) <<
" s / " << (end_of_data_wait_us / 1000000.0) <<
" s, art process count: " << art_processes_.size();
517 TLOG(TLVL_TRACE) <<
"endOfData: Broadcasting EndOfData Fragment";
518 FragmentPtr outFrag = Fragment::eodFrag(GetBufferCount());
519 bool success = broadcastFragment_(std::move(outFrag), outFrag);
522 TLOG(TLVL_TRACE) <<
"endOfData: Clearing buffers to make room for EndOfData Fragment";
523 for (
size_t ii = 0; ii < size(); ++ii)
525 broadcasts_.MarkBufferEmpty(ii,
true);
527 broadcastFragment_(std::move(outFrag), outFrag);
529 auto endOfDataProcessingStart = std::chrono::steady_clock::now();
531 if (art_processes_.size() > 0)
533 TLOG(TLVL_DEBUG) <<
"Allowing " << std::to_string(art_processes_.size()) <<
" art processes the chance to end gracefully";
534 if (end_of_data_wait_us == 0)
536 TLOG(TLVL_DEBUG) <<
"Expected art event processing time not specified. Waiting up to 100s for art to end gracefully.";
537 end_of_data_wait_us = 100 * 1000000;
540 auto sleep_count = (end_of_data_wait_us / 10000) + 1;
541 for (
size_t ii = 0; ii < sleep_count; ++ii) {
543 if (art_processes_.size() == 0)
break;
547 while (art_processes_.size() > 0)
549 TLOG(TLVL_DEBUG) <<
"There are " << std::to_string(art_processes_.size()) <<
" art processes remaining. Proceeding to shutdown.";
550 ShutdownArtProcesses(art_processes_);
552 TLOG(TLVL_INFO) <<
"It took " << TimeUtils::GetElapsedTime(endOfDataProcessingStart) <<
" s for all art processes to close after sending EndOfData Fragment";
554 ResetAttachedCount();
556 TLOG(TLVL_TRACE) <<
"endOfData: Clearing buffers";
557 for (
size_t ii = 0; ii < size(); ++ii)
559 MarkBufferEmpty(ii,
true);
561 released_incomplete_events_.clear();
563 TLOG(TLVL_TRACE) <<
"endOfData: Shutting down RequestReceiver";
564 requests_.reset(
nullptr);
566 TLOG(TLVL_TRACE) <<
"endOfData END";
567 TLOG(TLVL_INFO) <<
"EndOfData Complete. There were " << GetLastSeenBufferID() <<
" buffers processed.";
575 init_fragment_.reset(
nullptr);
580 if (requests_) requests_->SendRoutingToken(queue_size_);
581 TLOG(TLVL_DEBUG) <<
"Starting run " << run_id_
582 <<
", max queue size = "
585 << GetLockedBufferCount();
588 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
589 metricMan->sendMetric(
"Run Number", runSubrun,
"Run:Subrun", 1, MetricMode::LastPoint);
598 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
599 metricMan->sendMetric(
"Run Number", runSubrun,
"Run:Subrun", 1, MetricMode::LastPoint);
605 TLOG(TLVL_INFO) <<
"Ending run " << run_id_;
606 FragmentPtr endOfRunFrag(
new
607 Fragment(static_cast<size_t>
608 (ceil(
sizeof(my_rank) /
609 static_cast<double>(
sizeof(Fragment::value_type))))));
611 TLOG(TLVL_DEBUG) <<
"Broadcasting EndOfRun Fragment";
612 endOfRunFrag->setSystemType(Fragment::EndOfRunFragmentType);
613 *endOfRunFrag->dataBegin() = my_rank;
614 broadcastFragment_(std::move(endOfRunFrag), endOfRunFrag);
616 TLOG(TLVL_INFO) <<
"Run " << run_id_ <<
" has ended. There were " << run_event_count_ <<
" events in this run.";
617 run_event_count_ = 0;
618 run_incomplete_event_count_ = 0;
624 TLOG(TLVL_INFO) <<
"Ending subrun " << subrun_id_;
625 std::unique_ptr<artdaq::Fragment>
627 Fragment(static_cast<size_t>
628 (ceil(
sizeof(my_rank) /
629 static_cast<double>(
sizeof(Fragment::value_type))))));
631 TLOG(TLVL_DEBUG) <<
"Broadcasting EndOfSubrun Fragment";
632 endOfSubrunFrag->setSystemType(Fragment::EndOfSubrunFragmentType);
633 *endOfSubrunFrag->dataBegin() = my_rank;
635 broadcastFragment_(std::move(endOfSubrunFrag), endOfSubrunFrag);
637 TLOG(TLVL_INFO) <<
"Subrun " << subrun_id_ <<
" in run " << run_id_ <<
" has ended. There were " << subrun_event_count_ <<
" events in this subrun.";
638 subrun_event_count_ = 0;
639 subrun_incomplete_event_count_ = 0;
648 metricMan->sendMetric(
"Incomplete Event Count", GetIncompleteEventCount(),
"events", 1, MetricMode::LastPoint);
649 metricMan->sendMetric(
"Pending Event Count", GetPendingEventCount(),
"events", 1, MetricMode::LastPoint);
652 if (incomplete_event_report_interval_ms_ > 0 && GetLockedBufferCount())
654 if (TimeUtils::GetElapsedTimeMilliseconds(last_incomplete_event_report_time_) < static_cast<size_t>(incomplete_event_report_interval_ms_))
657 last_incomplete_event_report_time_ = std::chrono::steady_clock::now();
658 std::ostringstream oss;
659 oss <<
"Incomplete Events (" << num_fragments_per_event_ <<
"): ";
660 for (
auto& ev : active_buffers_)
662 auto hdr = getEventHeader_(ev);
663 oss << hdr->sequence_id <<
" (" << GetFragmentCount(hdr->sequence_id) <<
"), ";
665 TLOG(TLVL_DEBUG) << oss.str();
669 bool artdaq::SharedMemoryEventManager::broadcastFragment_(FragmentPtr frag, FragmentPtr& outFrag)
671 TLOG(TLVL_DEBUG) <<
"Broadcasting Fragment with seqID=" << frag->sequenceID() <<
", type " << detail::RawFragmentHeader::SystemTypeToString(frag->type()) <<
", size=" << frag->sizeBytes() <<
"B.";
672 auto buffer = broadcasts_.GetBufferForWriting(
false);
673 TLOG(TLVL_DEBUG) <<
"broadcastFragment_: after getting buffer 1st buffer=" << buffer;
674 auto start_time = std::chrono::steady_clock::now();
675 while (buffer == -1 && TimeUtils::GetElapsedTimeMilliseconds(start_time) < static_cast<size_t>(broadcast_timeout_ms_))
678 buffer = broadcasts_.GetBufferForWriting(
false);
680 TLOG(TLVL_DEBUG) <<
"broadcastFragment_: after getting buffer w/timeout, buffer=" << buffer <<
", elapsed time=" << TimeUtils::GetElapsedTime(start_time) <<
" s.";
683 TLOG(TLVL_ERROR) <<
"Broadcast of fragment type " << frag->typeString() <<
" failed due to timeout waiting for buffer!";
688 TLOG(TLVL_DEBUG) <<
"broadcastFragment_: Filling in RawEventHeader";
689 auto hdr =
reinterpret_cast<detail::RawEventHeader*
>(broadcasts_.GetBufferStart(buffer));
690 hdr->run_id = run_id_;
691 hdr->subrun_id = subrun_id_;
692 hdr->sequence_id = frag->sequenceID();
693 hdr->is_complete =
true;
694 broadcasts_.IncrementWritePos(buffer,
sizeof(detail::RawEventHeader));
696 TLOG(TLVL_DEBUG) <<
"broadcastFragment_ before Write calls";
697 broadcasts_.Write(buffer, frag->headerAddress(), frag->size() *
sizeof(RawDataType));
699 TLOG(TLVL_DEBUG) <<
"broadcastFragment_ Marking buffer full";
700 broadcasts_.MarkBufferFull(buffer, -1);
702 TLOG(TLVL_DEBUG) <<
"broadcastFragment_ Complete";
706 artdaq::detail::RawEventHeader* artdaq::SharedMemoryEventManager::getEventHeader_(
int buffer)
708 return reinterpret_cast<detail::RawEventHeader*
>(GetBufferStart(buffer));
711 int artdaq::SharedMemoryEventManager::getBufferForSequenceID_(Fragment::sequence_id_t seqID,
bool create_new, Fragment::timestamp_t timestamp)
713 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
714 TLOG(14) <<
"getBufferForSequenceID " << std::to_string(seqID) <<
" BEGIN";
715 auto buffers = GetBuffersOwnedByManager();
716 for (
auto& buf : buffers)
718 auto hdr = getEventHeader_(buf);
719 if (hdr->sequence_id == seqID)
721 TLOG(14) <<
"getBufferForSequenceID " << std::to_string(seqID) <<
" returning " << buf;
726 #if !ART_SUPPORTS_DUPLICATE_EVENTS
727 if (released_incomplete_events_.count(seqID)) {
728 TLOG(TLVL_ERROR) <<
"Event " << std::to_string(seqID) <<
" has already been marked \"Incomplete\" and sent to art!";
733 if (!create_new)
return -1;
735 check_pending_buffers_(lk);
736 int new_buffer = GetBufferForWriting(
false);
738 if (new_buffer == -1)
740 new_buffer = GetBufferForWriting(overwrite_mode_);
743 if (new_buffer == -1)
return -1;
744 TraceLock(buffer_mutexes_[new_buffer], 34,
"getBufferForSequenceID");
745 auto hdr = getEventHeader_(new_buffer);
746 hdr->is_complete =
false;
747 hdr->run_id = run_id_;
748 hdr->subrun_id = subrun_id_;
749 hdr->sequence_id = seqID;
750 buffer_writes_pending_[new_buffer] = 0;
751 IncrementWritePos(new_buffer,
sizeof(detail::RawEventHeader));
752 #if ART_HEX_VERSION >= 0x21100
753 SetMFIteration(
"Sequence ID " + std::to_string(seqID));
756 active_buffers_.insert(new_buffer);
759 if (timestamp != Fragment::InvalidTimestamp)
761 requests_->AddRequest(seqID, timestamp);
763 requests_->SendRequest();
765 TLOG(14) <<
"getBufferForSequenceID " << std::to_string(seqID) <<
" returning newly initialized buffer " << new_buffer;
769 bool artdaq::SharedMemoryEventManager::hasFragments_(
int buffer)
771 if (buffer == -1)
return true;
772 if (!CheckBuffer(buffer, BufferSemaphoreFlags::Writing))
776 ResetReadPos(buffer);
777 IncrementReadPos(buffer,
sizeof(detail::RawEventHeader));
778 return MoreDataInBuffer(buffer);
781 void artdaq::SharedMemoryEventManager::complete_buffer_(
int buffer)
783 auto hdr = getEventHeader_(buffer);
784 if (hdr->is_complete)
786 TLOG(TLVL_DEBUG) <<
"complete_buffer_: This fragment completes event " << std::to_string(hdr->sequence_id) <<
".";
789 requests_->RemoveRequest(hdr->sequence_id);
790 requests_->SendRoutingToken(1);
793 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
794 active_buffers_.erase(buffer);
795 pending_buffers_.insert(buffer);
798 check_pending_buffers_();
801 bool artdaq::SharedMemoryEventManager::bufferComparator(
int bufA,
int bufB)
803 return getEventHeader_(bufA)->sequence_id < getEventHeader_(bufB)->sequence_id;
806 void artdaq::SharedMemoryEventManager::check_pending_buffers_(std::unique_lock<std::mutex>
const& lock)
808 TLOG(TLVL_TRACE) <<
"check_pending_buffers_ BEGIN Locked=" << std::boolalpha << lock.owns_lock();
810 auto buffers = GetBuffersOwnedByManager();
811 for (
auto buf : buffers)
813 if (ResetBuffer(buf) && !pending_buffers_.count(buf))
815 auto hdr = getEventHeader_(buf);
816 if (active_buffers_.count(buf))
819 requests_->RemoveRequest(hdr->sequence_id);
820 requests_->SendRoutingToken(1);
822 active_buffers_.erase(buf);
823 pending_buffers_.insert(buf);
824 subrun_incomplete_event_count_++;
825 run_incomplete_event_count_++;
826 if (metricMan) metricMan->sendMetric(
"Incomplete Event Rate", 1,
"events/s", 3, MetricMode::Rate);
827 if (!released_incomplete_events_.count(hdr->sequence_id)) {
828 released_incomplete_events_[hdr->sequence_id] = num_fragments_per_event_ - GetFragmentCountInBuffer(buf);
831 released_incomplete_events_[hdr->sequence_id] -= GetFragmentCountInBuffer(buf);
833 TLOG(TLVL_WARNING) <<
"Active event " << std::to_string(hdr->sequence_id) <<
" is stale. Scheduling release of incomplete event (missing " << released_incomplete_events_[hdr->sequence_id] <<
" Fragments) to art.";
839 Fragment::sequence_id_t lowestSeqId = Fragment::InvalidSequenceID;
842 if (WriteReadyCount(
false) != 0)
844 for (
auto buf : active_buffers_)
846 auto hdr = getEventHeader_(buf);
847 TLOG(TLVL_TRACE) <<
"Buffer: " << buf <<
", SeqID: " << std::to_string(hdr->sequence_id) <<
", ACTIVE";
848 if (hdr->sequence_id < lowestSeqId)
850 lowestSeqId = hdr->sequence_id;
853 TLOG(TLVL_TRACE) <<
"Lowest SeqID held: " << std::to_string(lowestSeqId);
856 std::list<int> sorted_buffers(pending_buffers_.begin(), pending_buffers_.end());
857 sorted_buffers.sort([
this](
int a,
int b) {
return bufferComparator(a, b); });
860 double eventSize = 0;
861 for (
auto buf : sorted_buffers)
863 auto hdr = getEventHeader_(buf);
864 if (hdr->sequence_id > lowestSeqId)
break;
865 TLOG(TLVL_DEBUG) <<
"Releasing event " << std::to_string(hdr->sequence_id) <<
" in buffer " << buf <<
" to art.";
867 subrun_event_count_++;
870 eventSize += BufferDataSize(buf);
871 pending_buffers_.erase(buf);
873 eventSize /= counter;
875 TLOG(TLVL_TRACE) <<
"check_pending_buffers_: Sending Metrics";
878 auto full = ReadReadyCount();
879 auto empty = WriteReadyCount(overwrite_mode_);
882 metricMan->sendMetric(
"Event Rate", counter,
"Events/s", 1, MetricMode::Rate);
883 metricMan->sendMetric(
"Events Released to art (run)", run_event_count_,
"Events", 1, MetricMode::LastPoint);
884 metricMan->sendMetric(
"Incomplete Events Released to art (run)", run_incomplete_event_count_,
"Events", 1, MetricMode::LastPoint);
885 metricMan->sendMetric(
"Events Released to art (subrun)", subrun_event_count_,
"Events", 2, MetricMode::LastPoint);
886 metricMan->sendMetric(
"Incomplete Events Released to art (subrun)", subrun_incomplete_event_count_,
"Events", 2, MetricMode::LastPoint);
887 metricMan->sendMetric(
"Event Size", eventSize,
"Bytes", 1, MetricMode::Average);
889 metricMan->sendMetric(
"Shared Memory Full Buffers", full,
"buffers", 2, MetricMode::LastPoint);
890 metricMan->sendMetric(
"Shared Memory Available Buffers", empty,
"buffers", 2, MetricMode::LastPoint);
891 metricMan->sendMetric(
"Shared Memory Full %", full * 100 / static_cast<double>(total),
"%", 2, MetricMode::LastPoint);
892 metricMan->sendMetric(
"Shared Memory Available %", empty * 100 / static_cast<double>(total),
"%", 2, MetricMode::LastPoint);
894 TLOG(TLVL_TRACE) <<
"check_pending_buffers_ END";
897 void artdaq::SharedMemoryEventManager::send_init_frag_()
899 if (init_fragment_ !=
nullptr)
901 TLOG(TLVL_TRACE) <<
"Sending init Fragment to art...";
904 std::string fileName =
"receiveInitMessage_" + std::to_string(my_rank) +
".bin";
905 std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
906 ostream.write(reinterpret_cast<char*>(init_fragment_->dataBeginBytes()), init_fragment_->dataSizeBytes());
910 broadcastFragment_(std::move(init_fragment_), init_fragment_);
911 TLOG(TLVL_TRACE) <<
"Init Fragment sent";
913 else if (send_init_fragments_)
915 TLOG(TLVL_WARNING) <<
"Cannot send init fragment because I haven't yet received one!";
921 if (!init_fragment_ || init_fragment_ ==
nullptr)
923 init_fragment_.swap(frag);
void RunArt(std::shared_ptr< art_config_file > config_file, pid_t &pid_out)
Run an art instance, recording the return codes and restarting it until the end flag is raised...
virtual ~SharedMemoryEventManager()
SharedMemoryEventManager Destructor.
void ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun=0, int n_art_processes=-1)
Restart all art processes, using the given fhicl code to configure the new art processes.
The RequestSender contains methods used to send data requests and Routing tokens. ...
pid_t StartArtProcess(fhicl::ParameterSet pset)
Start one art process.
RawDataType * WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable=false)
Get a pointer to a reserved memory area for the given Fragment header.
RawEvent::run_id_t run_id_t
Copy RawEvent::run_id_t into local scope.
size_t GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in an event.
void StartArt()
Start all the art processes.
void ShutdownArtProcesses(std::set< pid_t > pids)
Shutdown a set of art processes.
void SetInitFragment(FragmentPtr frag)
Set the stored Init fragment, if one has not yet been set already.
void sendMetrics()
Send metrics to the MetricManager, if one has been instantiated in the application.
void startSubrun()
Start a new Subrun, incrementing the subrun number.
SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset)
SharedMemoryEventManager Constructor.
bool endSubrun()
Send an EndOfSubRunFragment to the art thread.
bool endRun()
Send an EndOfRunFragment to the art thread.
void DoneWritingFragment(detail::RawFragmentHeader frag)
Used to indicate that the given Fragment is now completely in the buffer. Will check for buffer compl...
bool endOfData()
Indicate that the end of input has been reached to the art processes.
void startRun(run_id_t runID)
Start a Run.
size_t GetFragmentCountInBuffer(int buffer, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in a buffer.