2 #include "artdaq/DAQrate/SharedMemoryEventManager.hh"
4 #include "artdaq-core/Core/StatisticsCollection.hh"
5 #include "artdaq-core/Utilities/TraceLock.hh"
7 #define TRACE_NAME (app_name + "_SharedMemoryEventManager").c_str()
10 #define TLVL_BUFLCK 41
12 std::mutex artdaq::SharedMemoryEventManager::sequence_id_mutex_;
13 std::mutex artdaq::SharedMemoryEventManager::subrun_event_map_mutex_;
20 : SharedMemoryManager(pset.get<uint32_t>(
"shared_memory_key", 0xBEE70000 + getpid()),
21 pset.get<size_t>(
"buffer_count"),
22 pset.has_key(
"max_event_size_bytes") ? pset.get<size_t>(
"max_event_size_bytes") : pset.get<size_t>(
"expected_fragments_per_event") * pset.get<size_t>(
"max_fragment_size_bytes"),
23 pset.get<size_t>(
"stale_buffer_timeout_usec", pset.get<size_t>(
"event_queue_wait_time", 5) * 1000000),
24 !pset.get<bool>(
"broadcast_mode", false))
25 , num_art_processes_(pset.get<size_t>(
"art_analyzer_count", 1))
26 , num_fragments_per_event_(pset.get<size_t>(
"expected_fragments_per_event"))
27 , queue_size_(pset.get<size_t>(
"buffer_count"))
29 , max_subrun_event_map_length_(pset.get<size_t>(
"max_subrun_lookup_table_size", 100))
30 , update_run_ids_(pset.get<bool>(
"update_run_ids_on_new_fragment", true))
31 , use_sequence_id_for_event_number_(pset.get<bool>(
"use_sequence_id_for_event_number", true))
32 , overwrite_mode_(!pset.get<bool>(
"use_art", true) || pset.get<bool>(
"overwrite_mode", false) || pset.get<bool>(
"broadcast_mode", false))
33 , send_init_fragments_(pset.get<bool>(
"send_init_fragments", true))
35 , buffer_writes_pending_()
36 , incomplete_event_report_interval_ms_(pset.get<int>(
"incomplete_event_report_interval_ms", -1))
37 , last_incomplete_event_report_time_(std::chrono::steady_clock::now())
38 , last_shmem_buffer_metric_update_(std::chrono::steady_clock::now())
39 , last_backpressure_report_time_(std::chrono::steady_clock::now())
40 , last_fragment_header_write_time_(std::chrono::steady_clock::now())
42 , broadcast_timeout_ms_(pset.get<int>(
"fragment_broadcast_timeout_ms", 3000))
44 , run_incomplete_event_count_(0)
45 , subrun_event_count_(0)
46 , subrun_incomplete_event_count_(0)
47 , oversize_fragment_count_(0)
48 , maximum_oversize_fragment_count_(pset.get<int>(
"maximum_oversize_fragment_count", 1))
51 , always_restart_art_(pset.get<bool>(
"restart_crashed_art_processes", true))
52 , manual_art_(pset.get<bool>(
"manual_art", false))
53 , current_art_pset_(art_pset)
54 , minimum_art_lifetime_s_(pset.get<double>(
"minimum_art_lifetime_s", 2.0))
55 , art_event_processing_time_us_(pset.get<size_t>(
"expected_art_event_processing_time_us", 1000000))
59 , broadcasts_(pset.get<uint32_t>(
"broadcast_shared_memory_key", 0xCEE70000 + getpid()),
60 pset.get<size_t>(
"broadcast_buffer_count", 10),
61 pset.get<size_t>(
"broadcast_buffer_size", 0x100000),
62 pset.get<int>(
"expected_art_event_processing_time_us", 100000) * pset.get<size_t>(
"buffer_count"), false)
64 subrun_event_map_[0] = 1;
65 SetMinWriteSize(
sizeof(detail::RawEventHeader) +
sizeof(detail::RawFragmentHeader));
66 broadcasts_.SetMinWriteSize(
sizeof(detail::RawEventHeader) +
sizeof(detail::RawFragmentHeader));
68 if (pset.get<
bool>(
"use_art",
true) ==
false)
70 TLOG(TLVL_INFO) <<
"BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:false";
71 num_art_processes_ = 0;
75 TLOG(TLVL_INFO) <<
"BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:true";
76 TLOG(TLVL_TRACE) <<
"art_pset is " << art_pset.to_string();
78 current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
80 if (overwrite_mode_ && num_art_processes_ > 0)
82 TLOG(TLVL_WARNING) <<
"Art is configured to run, but overwrite mode is enabled! Check your configuration if this in unintentional!";
84 else if (overwrite_mode_)
86 TLOG(TLVL_INFO) <<
"Overwrite Mode enabled, no configured art processes at startup";
89 for (
size_t ii = 0; ii < size(); ++ii)
91 buffer_writes_pending_[ii] = 0;
94 if (!IsValid())
throw cet::exception(app_name +
"_SharedMemoryEventManager") <<
"Unable to attach to Shared Memory!";
96 TLOG(TLVL_TRACE) <<
"Setting Writer rank to " << my_rank;
98 TLOG(TLVL_DEBUG) <<
"Writer Rank is " << GetRank();
106 TLOG(TLVL_TRACE) <<
"END CONSTRUCTOR";
111 TLOG(TLVL_TRACE) <<
"DESTRUCTOR";
112 if (running_) endOfData();
113 TLOG(TLVL_TRACE) <<
"Destructor END";
116 bool artdaq::SharedMemoryEventManager::AddFragment(detail::RawFragmentHeader frag,
void* dataPtr)
118 TLOG(TLVL_TRACE) <<
"AddFragment(Header, ptr) BEGIN frag.word_count=" << frag.word_count
119 <<
", sequence_id=" << frag.sequence_id;
120 auto buffer = getBufferForSequenceID_(frag.sequence_id,
true, frag.timestamp);
121 TLOG(TLVL_TRACE) <<
"Using buffer " << buffer <<
" for seqid=" << frag.sequence_id;
122 if (buffer == -1)
return false;
125 TLOG(TLVL_ERROR) <<
"Dropping event because data taking has already passed this event number: " << frag.sequence_id;
129 auto hdr = getEventHeader_(buffer);
132 hdr->run_id = run_id_;
134 hdr->subrun_id = GetSubrunForSequenceID(frag.sequence_id);
136 TLOG(TLVL_TRACE) <<
"AddFragment before Write calls";
137 Write(buffer, dataPtr, frag.word_count *
sizeof(RawDataType));
139 TLOG(TLVL_TRACE) <<
"Checking for complete event";
140 auto fragmentCount = GetFragmentCount(frag.sequence_id);
141 hdr->is_complete = fragmentCount == num_fragments_per_event_ && buffer_writes_pending_[buffer] == 0;
142 TLOG(TLVL_TRACE) <<
"hdr->is_complete=" << std::boolalpha << hdr->is_complete
143 <<
", fragmentCount=" << fragmentCount
144 <<
", num_fragments_per_event=" << num_fragments_per_event_
145 <<
", buffer_writes_pending_[buffer]=" << buffer_writes_pending_[buffer];
147 complete_buffer_(buffer);
148 if (requests_) requests_->SendRequest(
true);
150 TLOG(TLVL_TRACE) <<
"AddFragment END";
151 statsHelper_.addSample(FRAGMENTS_RECEIVED_STAT_KEY, frag.word_count *
sizeof(RawDataType));
155 bool artdaq::SharedMemoryEventManager::AddFragment(FragmentPtr frag,
size_t timeout_usec, FragmentPtr& outfrag)
157 TLOG(TLVL_TRACE) <<
"AddFragment(FragmentPtr) BEGIN";
158 auto hdr = *
reinterpret_cast<detail::RawFragmentHeader*
>(frag->headerAddress());
159 auto data = frag->headerAddress();
160 auto start = std::chrono::steady_clock::now();
162 while (!sts && TimeUtils::GetElapsedTimeMicroseconds(start) < timeout_usec)
164 sts = AddFragment(hdr, data);
165 if (!sts) usleep(1000);
169 outfrag = std::move(frag);
171 TLOG(TLVL_TRACE) <<
"AddFragment(FragmentPtr) RETURN " << std::boolalpha << sts;
177 TLOG(14) <<
"WriteFragmentHeader BEGIN";
178 auto buffer = getBufferForSequenceID_(frag.sequence_id,
true, frag.timestamp);
182 if (buffer == -1 && !dropIfNoBuffersAvailable)
184 std::unique_lock<std::mutex> bp_lk(sequence_id_mutex_);
185 if (TimeUtils::GetElapsedTime(last_backpressure_report_time_) > 1.0)
187 TLOG(TLVL_WARNING) << app_name <<
": Back-pressure condition: All Shared Memory buffers have been full for " << TimeUtils::GetElapsedTime(last_fragment_header_write_time_) <<
" s!";
188 last_backpressure_report_time_ = std::chrono::steady_clock::now();
194 TLOG(TLVL_ERROR) <<
"Dropping fragment with sequence id " << frag.sequence_id <<
" and fragment id " << frag.fragment_id <<
" because data taking has already passed this event.";
198 TLOG(TLVL_ERROR) <<
"Dropping fragment with sequence id " << frag.sequence_id <<
" and fragment id " << frag.fragment_id <<
" because there is no room in the queue and reliable mode is off.";
200 dropped_data_[frag.fragment_id].reset(
new Fragment(frag.word_count - frag.num_words()));
202 TLOG(6) <<
"Dropping fragment with sequence id " << frag.sequence_id <<
" and fragment id " << frag.fragment_id <<
" into " << (
void*)dropped_data_[frag.fragment_id]->dataBegin() <<
" sz=" << dropped_data_[frag.fragment_id]->dataSizeBytes();
203 return dropped_data_[frag.fragment_id]->dataBegin();
206 last_backpressure_report_time_ = std::chrono::steady_clock::now();
207 last_fragment_header_write_time_ = std::chrono::steady_clock::now();
209 buffer_writes_pending_[buffer]++;
213 metricMan->sendMetric(
"Input Fragment Rate", 1,
"Fragments/s", 1, MetricMode::Rate);
216 TLOG(TLVL_BUFLCK) <<
"WriteFragmentHeader: obtaining buffer_mutexes lock for buffer " << buffer;
218 std::unique_lock<std::mutex> lk(buffer_mutexes_[buffer]);
220 TLOG(TLVL_BUFLCK) <<
"WriteFragmentHeader: obtained buffer_mutexes lock for buffer " << buffer;
223 auto hdrpos =
reinterpret_cast<RawDataType*
>(GetWritePos(buffer));
224 Write(buffer, &frag, frag.num_words() *
sizeof(RawDataType));
226 auto pos =
reinterpret_cast<RawDataType*
>(GetWritePos(buffer));
227 if (frag.word_count - frag.num_words() > 0)
229 auto sts = IncrementWritePos(buffer, (frag.word_count - frag.num_words()) *
sizeof(RawDataType));
233 reinterpret_cast<detail::RawFragmentHeader*
>(hdrpos)->word_count = frag.num_words();
234 reinterpret_cast<detail::RawFragmentHeader*
>(hdrpos)->type = Fragment::InvalidFragmentType;
235 TLOG(TLVL_ERROR) <<
"Dropping over-size fragment with sequence id " << frag.sequence_id <<
" and fragment id " << frag.fragment_id <<
" because there is no room in the current buffer for this Fragment! (Keeping header)";
236 dropped_data_[frag.fragment_id].reset(
new Fragment(frag.word_count - frag.num_words()));
238 oversize_fragment_count_++;
240 if (maximum_oversize_fragment_count_ > 0 && oversize_fragment_count_ >= maximum_oversize_fragment_count_)
242 throw cet::exception(
"Too many over-size Fragments received! Please adjust max_event_size_bytes or max_fragment_size_bytes!");
245 TLOG(6) <<
"Dropping over-size fragment with sequence id " << frag.sequence_id <<
" and fragment id " << frag.fragment_id <<
" into " << (
void*)dropped_data_[frag.fragment_id]->dataBegin();
246 return dropped_data_[frag.fragment_id]->dataBegin();
249 TLOG(14) <<
"WriteFragmentHeader END";
255 TLOG(TLVL_TRACE) <<
"DoneWritingFragment BEGIN";
256 auto buffer = getBufferForSequenceID_(frag.sequence_id,
false, frag.timestamp);
257 if (buffer == -1) Detach(
true,
"SharedMemoryEventManager",
"getBufferForSequenceID_ returned -1 when it REALLY shouldn't have! Check program logic!");
258 if (buffer == -2) {
return; }
260 statsHelper_.addSample(FRAGMENTS_RECEIVED_STAT_KEY, frag.word_count *
sizeof(RawDataType));
262 TLOG(TLVL_BUFLCK) <<
"DoneWritingFragment: obtaining buffer_mutexes lock for buffer " << buffer;
264 std::unique_lock<std::mutex> lk(buffer_mutexes_[buffer]);
266 TLOG(TLVL_BUFLCK) <<
"DoneWritingFragment: obtained buffer_mutexes lock for buffer " << buffer;
270 TLOG(TLVL_DEBUG) <<
"DoneWritingFragment: Received Fragment with sequence ID " << frag.sequence_id <<
" and fragment id " << frag.fragment_id <<
" (type " << (int)frag.type <<
")";
271 auto hdr = getEventHeader_(buffer);
274 hdr->run_id = run_id_;
276 hdr->subrun_id = GetSubrunForSequenceID(frag.sequence_id);
278 TLOG(TLVL_TRACE) <<
"DoneWritingFragment: Updating buffer touch time";
281 buffer_writes_pending_[buffer]--;
282 if (buffer_writes_pending_[buffer] != 0)
284 TLOG(TLVL_TRACE) <<
"Done writing fragment, but there's another writer. Not doing bookkeeping steps.";
287 TLOG(TLVL_TRACE) <<
"Done writing fragment, and no other writer. Doing bookkeeping steps.";
288 auto frag_count = GetFragmentCount(frag.sequence_id);
289 hdr->is_complete = frag_count == num_fragments_per_event_;
290 TLOG(TLVL_TRACE) <<
"DoneWritingFragment: Received Fragment with sequence ID " << frag.sequence_id <<
" and fragment id " << frag.fragment_id <<
", count/expected = " << frag_count <<
"/" << num_fragments_per_event_;
291 #if ART_SUPPORTS_DUPLICATE_EVENTS
292 if (!hdr->is_complete && released_incomplete_events_.count(frag.sequence_id))
294 hdr->is_complete = frag_count == released_incomplete_events_[frag.sequence_id] && buffer_writes_pending_[buffer] == 0;
299 complete_buffer_(buffer);
300 if (requests_) requests_->SendRequest(
true);
301 TLOG(TLVL_TRACE) <<
"DoneWritingFragment END";
306 return GetFragmentCountInBuffer(getBufferForSequenceID_(seqID,
false), type);
311 if (buffer == -1)
return 0;
312 ResetReadPos(buffer);
313 IncrementReadPos(buffer,
sizeof(detail::RawEventHeader));
317 while (MoreDataInBuffer(buffer))
319 auto fragHdr =
reinterpret_cast<artdaq::detail::RawFragmentHeader*
>(GetReadPos(buffer));
320 IncrementReadPos(buffer, fragHdr->word_count *
sizeof(RawDataType));
321 if (type != Fragment::InvalidFragmentType && fragHdr->type != type)
continue;
322 TLOG(TLVL_TRACE) <<
"Adding Fragment with size=" << fragHdr->word_count <<
" to Fragment count";
333 auto start_time = std::chrono::steady_clock::now();
335 TLOG(TLVL_INFO) <<
"Starting art process with config file " << config_file->getFileName();
341 char* filename =
new char[config_file->getFileName().length() + 1];
342 strcpy(filename, config_file->getFileName().c_str());
345 std::string debugArgS =
"--config-out=" + app_name +
"_art.out";
346 char* debugArg =
new char[debugArgS.length() + 1];
347 strcpy(debugArg, debugArgS.c_str());
349 std::vector<char*> args{(
char*)
"art", (
char*)
"-c", filename, debugArg, NULL};
351 std::vector<char*> args{ (
char*)
"art", (
char*)
"-c", filename, NULL };
361 std::string envVarKey =
"ARTDAQ_PARTITION_NUMBER";
362 std::string envVarValue = std::to_string(GetPartitionNumber());
363 if (setenv(envVarKey.c_str(), envVarValue.c_str(), 1) != 0)
365 TLOG(TLVL_ERROR) <<
"Error setting environment variable \"" << envVarKey
366 <<
"\" in the environment of a child art process. "
367 <<
"This may result in incorrect TCP port number "
368 <<
"assignments or other issues, and data may "
369 <<
"not flow through the system correctly.";
371 envVarKey =
"ARTDAQ_APPLICATION_NAME";
372 envVarValue = app_name;
373 if (setenv(envVarKey.c_str(), envVarValue.c_str(), 1) != 0)
375 TLOG(TLVL_DEBUG) <<
"Error setting environment variable \"" << envVarKey
376 <<
"\" in the environment of a child art process. ";
378 envVarKey =
"ARTDAQ_RANK";
379 envVarValue = std::to_string(my_rank);
380 if (setenv(envVarKey.c_str(), envVarValue.c_str(), 1) != 0)
382 TLOG(TLVL_DEBUG) <<
"Error setting environment variable \"" << envVarKey
383 <<
"\" in the environment of a child art process. ";
386 execvp(
"art", &args[0]);
395 std::cout <<
"Please run the following command in a separate terminal:" << std::endl
396 <<
"art -c " << config_file->getFileName() << std::endl
397 <<
"Then, in a third terminal, execute: \"ps aux|grep [a]rt -c " << config_file->getFileName() <<
"\" and note the PID of the art process." << std::endl
398 <<
"Finally, return to this window and enter the pid: " << std::endl;
403 TLOG(TLVL_INFO) <<
"PID of new art process is " << pid;
405 std::unique_lock<std::mutex> lk(art_process_mutex_);
406 art_processes_.insert(pid);
409 auto sts = waitid(P_PID, pid, &status, WEXITED);
410 TLOG(TLVL_INFO) <<
"Removing PID " << pid <<
" from process list";
412 std::unique_lock<std::mutex> lk(art_process_mutex_);
413 art_processes_.erase(pid);
417 TLOG(TLVL_WARNING) <<
"Error occurred in waitid for art process " << pid <<
": " << errno <<
" (" << strerror(errno) <<
").";
419 else if (status.si_code == CLD_EXITED && status.si_status == 0)
421 TLOG(TLVL_INFO) <<
"art process " << pid <<
" exited normally, " << (restart_art_ ?
"restarting" :
"not restarting");
425 auto art_lifetime = TimeUtils::GetElapsedTime(start_time);
426 if (art_lifetime < minimum_art_lifetime_s_) restart_art_ =
false;
428 auto exit_type =
"exited with status code";
429 switch (status.si_code)
433 exit_type =
"was killed with signal";
440 TLOG((restart_art_ ? TLVL_WARNING : TLVL_ERROR))
441 <<
"art process " << pid <<
" " << exit_type <<
" " << status.si_status
442 << (status.si_code == CLD_DUMPED ?
" (core dumped)" :
"")
443 <<
" after running for " << std::setprecision(2) << std::fixed << art_lifetime <<
" seconds, "
444 << (restart_art_ ?
"restarting" :
"not restarting");
446 }
while (restart_art_);
451 restart_art_ = always_restart_art_;
452 if (num_art_processes_ == 0)
return;
453 for (
size_t ii = 0; ii < num_art_processes_; ++ii)
455 StartArtProcess(current_art_pset_);
461 static std::mutex start_art_mutex;
462 std::unique_lock<std::mutex> lk(start_art_mutex);
464 restart_art_ = always_restart_art_;
465 auto initialCount = GetAttachedCount();
466 auto startTime = std::chrono::steady_clock::now();
468 if (pset != current_art_pset_ || !current_art_config_file_)
470 current_art_pset_ = pset;
471 current_art_config_file_ = std::make_shared<art_config_file>(pset);
473 std::shared_ptr<std::atomic<pid_t>> pid(
new std::atomic<pid_t>(-1));
474 boost::thread thread([&] { RunArt(current_art_config_file_, pid); });
477 auto currentCount = GetAttachedCount() - initialCount;
478 while ((currentCount < 1 || *pid <= 0) && (TimeUtils::GetElapsedTime(startTime) < 5 || manual_art_))
481 currentCount = GetAttachedCount() - initialCount;
483 if ((currentCount < 1 || *pid <= 0) && manual_art_)
485 TLOG(TLVL_WARNING) <<
"Manually-started art process has not connected to shared memory or has bad PID: connected:" << currentCount <<
", PID:" << pid;
488 else if (currentCount < 1 || *pid <= 0)
490 TLOG(TLVL_WARNING) <<
"art process has not started after 5s. Check art configuration!"
491 <<
" (pid=" << *pid <<
", attachedCount=" << currentCount <<
")";
496 TLOG(TLVL_INFO) << std::setw(4) << std::fixed <<
"art initialization took "
497 << TimeUtils::GetElapsedTime(startTime) <<
" seconds.";
505 restart_art_ =
false;
509 auto check_pids = [&](
bool print) {
510 std::unique_lock<std::mutex> lk(art_process_mutex_);
511 for (
auto pid = pids.begin(); pid != pids.end();)
517 TLOG(TLVL_WARNING) <<
"Removing an invalid PID (" << *pid
518 <<
") from the shutdown list.";
519 pid = pids.erase(pid);
521 else if (kill(*pid, 0) < 0)
523 pid = pids.erase(pid);
527 if (print) std::cout << *pid <<
" ";
532 auto count_pids = [&]() {
533 std::unique_lock<std::mutex> lk(art_process_mutex_);
537 if (count_pids() == 0)
539 TLOG(14) <<
"All art processes already exited, nothing to do.";
547 TLOG(TLVL_TRACE) <<
"Gently informing art processes that it is time to shut down";
548 std::unique_lock<std::mutex> lk(art_process_mutex_);
549 for (
auto pid : pids)
551 TLOG(TLVL_TRACE) <<
"Sending SIGQUIT to pid " << pid;
556 int graceful_wait_ms = 5000;
557 int int_wait_ms = 1000;
559 TLOG(TLVL_TRACE) <<
"Waiting up to " << graceful_wait_ms <<
" ms for all art processes to exit gracefully";
560 for (
int ii = 0; ii < graceful_wait_ms; ++ii)
565 if (count_pids() == 0)
567 TLOG(TLVL_TRACE) <<
"All art processes exited after " << ii <<
" ms.";
573 TLOG(TLVL_TRACE) <<
"Insisting that the art processes shut down";
574 std::unique_lock<std::mutex> lk(art_process_mutex_);
575 for (
auto pid : pids)
581 TLOG(TLVL_TRACE) <<
"Waiting up to " << int_wait_ms <<
" ms for all art processes to exit";
582 for (
int ii = graceful_wait_ms; ii < graceful_wait_ms + int_wait_ms; ++ii)
588 if (count_pids() == 0)
590 TLOG(TLVL_TRACE) <<
"All art processes exited after " << ii <<
" ms.";
595 TLOG(TLVL_TRACE) <<
"Killing remaning art processes with extreme prejudice";
596 while (count_pids() > 0)
599 std::unique_lock<std::mutex> lk(art_process_mutex_);
600 kill(*pids.begin(), SIGKILL);
608 std::cout <<
"Please shut down all art processes, then hit return/enter" << std::endl;
609 while (count_pids() > 0)
611 std::cout <<
"The following PIDs are running: ";
613 std::cout << std::endl;
622 TLOG(TLVL_DEBUG) <<
"ReconfigureArt BEGIN";
623 if (restart_art_ || !always_restart_art_)
627 for (
size_t ii = 0; ii < broadcasts_.size(); ++ii)
629 broadcasts_.MarkBufferEmpty(ii,
true);
631 if (newRun == 0) newRun = run_id_ + 1;
633 if (art_pset != current_art_pset_ || !current_art_config_file_)
635 current_art_pset_ = art_pset;
636 current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
639 if (n_art_processes != -1)
641 TLOG(TLVL_INFO) <<
"Setting number of art processes to " << n_art_processes;
642 num_art_processes_ = n_art_processes;
645 TLOG(TLVL_DEBUG) <<
"ReconfigureArt END";
651 init_fragment_.reset(
nullptr);
652 TLOG(TLVL_DEBUG) <<
"SharedMemoryEventManager::endOfData";
653 restart_art_ =
false;
655 size_t initialStoreSize = GetIncompleteEventCount();
656 TLOG(TLVL_DEBUG) <<
"endOfData: Flushing " << initialStoreSize
657 <<
" stale events from the SharedMemoryEventManager.";
658 int counter = initialStoreSize;
659 while (active_buffers_.size() > 0 && counter > 0)
661 complete_buffer_(*active_buffers_.begin());
664 TLOG(TLVL_DEBUG) <<
"endOfData: Done flushing, there are now " << GetIncompleteEventCount()
665 <<
" stale events in the SharedMemoryEventManager.";
667 TLOG(TLVL_DEBUG) <<
"Waiting for " << (ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_))) <<
" outstanding buffers...";
668 auto start = std::chrono::steady_clock::now();
669 auto lastReadCount = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
670 auto end_of_data_wait_us = art_event_processing_time_us_ * (lastReadCount > 0 ? lastReadCount : 1);
672 auto outstanding_buffer_wait_time = art_event_processing_time_us_ > 100000 ? 100000 : art_event_processing_time_us_;
675 while (lastReadCount > 0 && (end_of_data_wait_us == 0 || TimeUtils::GetElapsedTimeMicroseconds(start) < end_of_data_wait_us) && get_art_process_count_() > 0)
677 auto temp = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
678 if (temp != lastReadCount)
680 TLOG(TLVL_TRACE) <<
"Waiting for " << temp <<
" outstanding buffers...";
681 lastReadCount = temp;
682 start = std::chrono::steady_clock::now();
684 if (lastReadCount > 0)
686 TRACE(19,
"About to sleep %lu us - lastReadCount=%lu size=%lu end_of_data_wait_us=%lu", outstanding_buffer_wait_time, lastReadCount, size(), end_of_data_wait_us);
687 usleep(outstanding_buffer_wait_time);
691 TLOG(TLVL_DEBUG) <<
"endOfData: After wait for outstanding buffers. Still outstanding: " << lastReadCount <<
", time waited: "
692 << TimeUtils::GetElapsedTime(start) <<
" s / " << (end_of_data_wait_us / 1000000.0) <<
" s, art process count: " << get_art_process_count_();
694 TLOG(TLVL_DEBUG) <<
"endOfData: Broadcasting EndOfData Fragment";
695 FragmentPtr outFrag = Fragment::eodFrag(GetBufferCount());
696 bool success = broadcastFragment_(std::move(outFrag), outFrag);
699 TLOG(TLVL_DEBUG) <<
"endOfData: Clearing buffers to make room for EndOfData Fragment";
700 for (
size_t ii = 0; ii < broadcasts_.size(); ++ii)
702 broadcasts_.MarkBufferEmpty(ii,
true);
704 broadcastFragment_(std::move(outFrag), outFrag);
706 auto endOfDataProcessingStart = std::chrono::steady_clock::now();
708 if (get_art_process_count_() > 0)
710 TLOG(TLVL_DEBUG) <<
"Allowing " << get_art_process_count_() <<
" art processes the chance to end gracefully";
711 if (end_of_data_wait_us == 0)
713 TLOG(TLVL_DEBUG) <<
"Expected art event processing time not specified. Waiting up to 100s for art to end gracefully.";
714 end_of_data_wait_us = 100 * 1000000;
717 auto sleep_count = (end_of_data_wait_us / 10000) + 1;
718 for (
size_t ii = 0; ii < sleep_count; ++ii)
721 if (get_art_process_count_() == 0)
break;
725 while (get_art_process_count_() > 0)
727 TLOG(TLVL_DEBUG) <<
"There are " << get_art_process_count_() <<
" art processes remaining. Proceeding to shutdown.";
729 ShutdownArtProcesses(art_processes_);
731 TLOG(TLVL_DEBUG) <<
"It took " << TimeUtils::GetElapsedTime(endOfDataProcessingStart) <<
" s for all art processes to close after sending EndOfData Fragment";
733 ResetAttachedCount();
735 TLOG(TLVL_DEBUG) <<
"endOfData: Clearing buffers";
736 for (
size_t ii = 0; ii < size(); ++ii)
738 MarkBufferEmpty(ii,
true);
746 released_incomplete_events_.clear();
748 TLOG(TLVL_DEBUG) <<
"endOfData: Shutting down RequestSender";
749 requests_.reset(
nullptr);
751 TLOG(TLVL_DEBUG) <<
"endOfData END";
752 TLOG(TLVL_INFO) <<
"EndOfData Complete. There were " << GetLastSeenBufferID() <<
" buffers processed.";
759 init_fragment_.reset(
nullptr);
760 statsHelper_.resetStatistics();
761 TLOG(TLVL_TRACE) <<
"startRun: Clearing broadcast buffers";
762 for (
size_t ii = 0; ii < broadcasts_.size(); ++ii)
764 broadcasts_.MarkBufferEmpty(ii,
true);
769 std::unique_lock<std::mutex> lk(subrun_event_map_mutex_);
770 subrun_event_map_.clear();
771 subrun_event_map_[0] = 1;
773 run_event_count_ = 0;
774 run_incomplete_event_count_ = 0;
778 requests_->SetRunNumber(static_cast<uint32_t>(run_id_));
779 requests_->SendRoutingToken(queue_size_, run_id_);
781 TLOG(TLVL_DEBUG) <<
"Starting run " << run_id_
782 <<
", max queue size = "
785 << GetLockedBufferCount();
788 metricMan->sendMetric(
"Run Number", static_cast<unsigned long>(run_id_),
"Run", 1, MetricMode::LastPoint);
794 TLOG(TLVL_INFO) <<
"Ending run " << run_id_;
795 FragmentPtr endOfRunFrag(
new Fragment(static_cast<size_t>(ceil(
sizeof(my_rank) /
796 static_cast<double>(
sizeof(Fragment::value_type))))));
798 TLOG(TLVL_DEBUG) <<
"Broadcasting EndOfRun Fragment";
799 endOfRunFrag->setSystemType(Fragment::EndOfRunFragmentType);
800 *endOfRunFrag->dataBegin() = my_rank;
801 broadcastFragment_(std::move(endOfRunFrag), endOfRunFrag);
803 TLOG(TLVL_INFO) <<
"Run " << run_id_ <<
" has ended. There were " << run_event_count_ <<
" events in this run.";
804 run_event_count_ = 0;
805 run_incomplete_event_count_ = 0;
806 oversize_fragment_count_ = 0;
808 std::unique_lock<std::mutex> lk(subrun_event_map_mutex_);
809 subrun_event_map_.clear();
810 subrun_event_map_[0] = 1;
818 if (boundary == 0 || boundary == Fragment::InvalidSequenceID)
return;
820 std::unique_lock<std::mutex> lk(subrun_event_map_mutex_);
822 TLOG(TLVL_INFO) <<
"Will roll over to subrun " << subrun <<
" when I reach Sequence ID " << boundary;
823 subrun_event_map_[boundary] = subrun;
824 while (subrun_event_map_.size() > max_subrun_event_map_length_)
826 subrun_event_map_.erase(subrun_event_map_.begin());
832 Fragment::sequence_id_t seqID = 0;
835 std::unique_lock<std::mutex> lk(subrun_event_map_mutex_);
836 for (
auto& it : subrun_event_map_)
838 if (it.first >= seqID) seqID = it.first + 1;
839 if (it.second >= subrun) subrun = it.second + 1;
842 rolloverSubrun(seqID, subrun);
849 metricMan->sendMetric(
"Incomplete Event Count", GetIncompleteEventCount(),
"events", 1, MetricMode::LastPoint);
850 metricMan->sendMetric(
"Pending Event Count", GetPendingEventCount(),
"events", 1, MetricMode::LastPoint);
853 if (incomplete_event_report_interval_ms_ > 0 && GetLockedBufferCount())
855 if (TimeUtils::GetElapsedTimeMilliseconds(last_incomplete_event_report_time_) < static_cast<size_t>(incomplete_event_report_interval_ms_))
858 last_incomplete_event_report_time_ = std::chrono::steady_clock::now();
859 std::ostringstream oss;
860 oss <<
"Incomplete Events (" << num_fragments_per_event_ <<
"): ";
861 for (
auto& ev : active_buffers_)
863 auto hdr = getEventHeader_(ev);
864 oss << hdr->sequence_id <<
" (" << GetFragmentCount(hdr->sequence_id) <<
"), ";
866 TLOG(TLVL_DEBUG) << oss.str();
870 bool artdaq::SharedMemoryEventManager::broadcastFragment_(FragmentPtr frag, FragmentPtr& outFrag)
872 TLOG(TLVL_DEBUG) <<
"Broadcasting Fragment with seqID=" << frag->sequenceID() <<
", type " << detail::RawFragmentHeader::SystemTypeToString(frag->type()) <<
", size=" << frag->sizeBytes() <<
"B.";
873 auto buffer = broadcasts_.GetBufferForWriting(
false);
874 TLOG(TLVL_DEBUG) <<
"broadcastFragment_: after getting buffer 1st buffer=" << buffer;
875 auto start_time = std::chrono::steady_clock::now();
876 while (buffer == -1 && TimeUtils::GetElapsedTimeMilliseconds(start_time) < static_cast<size_t>(broadcast_timeout_ms_))
879 buffer = broadcasts_.GetBufferForWriting(
false);
881 TLOG(TLVL_DEBUG) <<
"broadcastFragment_: after getting buffer w/timeout, buffer=" << buffer <<
", elapsed time=" << TimeUtils::GetElapsedTime(start_time) <<
" s.";
884 TLOG(TLVL_ERROR) <<
"Broadcast of fragment type " << frag->typeString() <<
" failed due to timeout waiting for buffer!";
889 TLOG(TLVL_DEBUG) <<
"broadcastFragment_: Filling in RawEventHeader";
890 auto hdr =
reinterpret_cast<detail::RawEventHeader*
>(broadcasts_.GetBufferStart(buffer));
891 hdr->run_id = run_id_;
892 hdr->subrun_id = GetSubrunForSequenceID(frag->sequenceID());
893 hdr->sequence_id = frag->sequenceID();
894 hdr->is_complete =
true;
895 broadcasts_.IncrementWritePos(buffer,
sizeof(detail::RawEventHeader));
897 TLOG(TLVL_DEBUG) <<
"broadcastFragment_ before Write calls";
898 broadcasts_.Write(buffer, frag->headerAddress(), frag->size() *
sizeof(RawDataType));
900 TLOG(TLVL_DEBUG) <<
"broadcastFragment_ Marking buffer full";
901 broadcasts_.MarkBufferFull(buffer, -1);
903 TLOG(TLVL_DEBUG) <<
"broadcastFragment_ Complete";
907 artdaq::detail::RawEventHeader* artdaq::SharedMemoryEventManager::getEventHeader_(
int buffer)
909 return reinterpret_cast<detail::RawEventHeader*
>(GetBufferStart(buffer));
914 std::unique_lock<std::mutex> lk(subrun_event_map_mutex_);
916 TLOG(TLVL_TRACE) <<
"GetSubrunForSequenceID BEGIN map size = " << subrun_event_map_.size();
917 auto it = subrun_event_map_.begin();
920 while (it->first <= seqID && it != subrun_event_map_.end())
922 TLOG(TLVL_TRACE) <<
"Map has sequence ID " << it->first <<
", subrun " << it->second <<
" (looking for <= " << seqID <<
")";
927 TLOG(TLVL_DEBUG) <<
"GetSubrunForSequenceID returning subrun " << subrun <<
" for sequence ID " << seqID;
931 int artdaq::SharedMemoryEventManager::getBufferForSequenceID_(Fragment::sequence_id_t seqID,
bool create_new, Fragment::timestamp_t timestamp)
933 TLOG(14) <<
"getBufferForSequenceID " << seqID <<
" BEGIN";
934 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
936 TLOG(14) <<
"getBufferForSequenceID obtained sequence_id_mutex for seqid=" << seqID;
938 auto buffers = GetBuffersOwnedByManager();
939 for (
auto& buf : buffers)
941 auto hdr = getEventHeader_(buf);
942 if (hdr->sequence_id == seqID)
944 TLOG(14) <<
"getBufferForSequenceID " << seqID <<
" returning " << buf;
949 #if !ART_SUPPORTS_DUPLICATE_EVENTS
950 if (released_incomplete_events_.count(seqID))
952 TLOG(TLVL_ERROR) <<
"Event " << seqID <<
" has already been marked \"Incomplete\" and sent to art!";
957 if (!create_new)
return -1;
959 check_pending_buffers_(lk);
960 int new_buffer = GetBufferForWriting(
false);
962 if (new_buffer == -1)
964 new_buffer = GetBufferForWriting(overwrite_mode_);
967 if (new_buffer == -1)
return -1;
968 TLOG(TLVL_BUFLCK) <<
"getBufferForSequenceID_: obtaining buffer_mutexes lock for buffer " << new_buffer;
969 std::unique_lock<std::mutex> buffer_lk(buffer_mutexes_[new_buffer]);
970 TLOG(TLVL_BUFLCK) <<
"getBufferForSequenceID_: obtained buffer_mutexes lock for buffer " << new_buffer;
972 auto hdr = getEventHeader_(new_buffer);
973 hdr->is_complete =
false;
974 hdr->run_id = run_id_;
975 hdr->subrun_id = GetSubrunForSequenceID(seqID);
976 hdr->event_id = use_sequence_id_for_event_number_ ?
static_cast<uint32_t
>(seqID) : static_cast<uint32_t>(timestamp);
977 hdr->sequence_id = seqID;
978 buffer_writes_pending_[new_buffer] = 0;
979 IncrementWritePos(new_buffer,
sizeof(detail::RawEventHeader));
980 SetMFIteration(
"Sequence ID " + std::to_string(seqID));
982 TLOG(TLVL_BUFFER) <<
"getBufferForSequenceID placing " << new_buffer <<
" to active.";
983 active_buffers_.insert(new_buffer);
984 TLOG(TLVL_BUFFER) <<
"Buffer occupancy now (total,full,reading,empty,pending,active)=("
986 << ReadReadyCount() <<
","
987 << WriteReadyCount(
true) - WriteReadyCount(
false) - ReadReadyCount() <<
","
988 << WriteReadyCount(
false) <<
","
989 << pending_buffers_.size() <<
","
990 << active_buffers_.size() <<
")";
994 if (timestamp != Fragment::InvalidTimestamp)
996 requests_->AddRequest(seqID, timestamp);
1002 requests_->SendRequest();
1005 TLOG(14) <<
"getBufferForSequenceID " << seqID <<
" returning newly initialized buffer " << new_buffer;
1009 bool artdaq::SharedMemoryEventManager::hasFragments_(
int buffer)
1011 if (buffer == -1)
return true;
1012 if (!CheckBuffer(buffer, BufferSemaphoreFlags::Writing))
1016 ResetReadPos(buffer);
1017 IncrementReadPos(buffer,
sizeof(detail::RawEventHeader));
1018 return MoreDataInBuffer(buffer);
1021 void artdaq::SharedMemoryEventManager::complete_buffer_(
int buffer)
1023 auto hdr = getEventHeader_(buffer);
1024 if (hdr->is_complete)
1026 TLOG(TLVL_DEBUG) <<
"complete_buffer_: This fragment completes event " << hdr->sequence_id <<
".";
1029 TLOG(TLVL_BUFFER) <<
"complete_buffer_ moving " << buffer <<
" from active to pending.";
1031 TLOG(TLVL_BUFLCK) <<
"complete_buffer_: obtaining sequence_id_mutex lock for seqid=" << hdr->sequence_id;
1032 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
1033 TLOG(TLVL_BUFLCK) <<
"complete_buffer_: obtained sequence_id_mutex lock for seqid=" << hdr->sequence_id;
1034 active_buffers_.erase(buffer);
1035 pending_buffers_.insert(buffer);
1037 TLOG(TLVL_BUFFER) <<
"Buffer occupancy now (total,full,reading,empty,pending,active)=("
1039 << ReadReadyCount() <<
","
1040 << WriteReadyCount(
true) - WriteReadyCount(
false) - ReadReadyCount() <<
","
1041 << WriteReadyCount(
false) <<
","
1042 << pending_buffers_.size() <<
","
1043 << active_buffers_.size() <<
")";
1047 requests_->RemoveRequest(hdr->sequence_id);
1050 CheckPendingBuffers();
1053 bool artdaq::SharedMemoryEventManager::bufferComparator(
int bufA,
int bufB)
1055 return getEventHeader_(bufA)->sequence_id < getEventHeader_(bufB)->sequence_id;
1060 TLOG(TLVL_BUFLCK) <<
"CheckPendingBuffers: Obtaining sequence_id_mutex_";
1061 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
1062 TLOG(TLVL_BUFLCK) <<
"CheckPendingBuffers: Obtained sequence_id_mutex_";
1063 check_pending_buffers_(lk);
1066 void artdaq::SharedMemoryEventManager::check_pending_buffers_(std::unique_lock<std::mutex>
const& lock)
1068 TLOG(TLVL_TRACE) <<
"check_pending_buffers_ BEGIN Locked=" << std::boolalpha << lock.owns_lock();
1070 auto buffers = GetBuffersOwnedByManager();
1071 for (
auto buf : buffers)
1073 if (ResetBuffer(buf) && !pending_buffers_.count(buf))
1075 TLOG(15) <<
"check_pending_buffers_ Incomplete buffer detected, buf=" << buf <<
" active_bufers_.count(buf)=" << active_buffers_.count(buf) <<
" buffer_writes_pending_[buf]=" << buffer_writes_pending_[buf].load();
1076 auto hdr = getEventHeader_(buf);
1077 if (active_buffers_.count(buf) && (buffer_writes_pending_[buf].load() == 0 || !running_))
1081 requests_->RemoveRequest(hdr->sequence_id);
1083 TLOG(TLVL_BUFFER) <<
"check_pending_buffers_ moving buffer " << buf <<
" from active to pending";
1084 active_buffers_.erase(buf);
1085 pending_buffers_.insert(buf);
1086 TLOG(TLVL_BUFFER) <<
"Buffer occupancy now (total,full,reading,empty,pending,active)=("
1088 << ReadReadyCount() <<
","
1089 << WriteReadyCount(
true) - WriteReadyCount(
false) - ReadReadyCount() <<
","
1090 << WriteReadyCount(
false) <<
","
1091 << pending_buffers_.size() <<
","
1092 << active_buffers_.size() <<
")";
1094 run_incomplete_event_count_++;
1095 if (metricMan) metricMan->sendMetric(
"Incomplete Event Rate", 1,
"events/s", 3, MetricMode::Rate);
1096 if (!released_incomplete_events_.count(hdr->sequence_id))
1098 released_incomplete_events_[hdr->sequence_id] = num_fragments_per_event_ - GetFragmentCountInBuffer(buf);
1102 released_incomplete_events_[hdr->sequence_id] -= GetFragmentCountInBuffer(buf);
1104 TLOG(TLVL_WARNING) <<
"Active event " << hdr->sequence_id <<
" is stale. Scheduling release of incomplete event (missing " << released_incomplete_events_[hdr->sequence_id] <<
" Fragments) to art.";
1109 std::list<int> sorted_buffers(pending_buffers_.begin(), pending_buffers_.end());
1110 sorted_buffers.sort([
this](
int a,
int b) {
return bufferComparator(a, b); });
1113 double eventSize = 0;
1114 for (
auto buf : sorted_buffers)
1116 auto hdr = getEventHeader_(buf);
1117 auto thisEventSize = BufferDataSize(buf);
1119 TLOG(TLVL_DEBUG) <<
"Releasing event " << std::to_string(hdr->sequence_id) <<
" in buffer " << buf <<
" to art, "
1120 <<
"event_size=" << thisEventSize <<
", buffer_size=" << BufferSize();
1121 statsHelper_.addSample(EVENTS_RELEASED_STAT_KEY, thisEventSize);
1123 TLOG(TLVL_BUFFER) <<
"check_pending_buffers_ removing buffer " << buf <<
" moving from pending to full";
1124 MarkBufferFull(buf);
1127 eventSize += thisEventSize;
1128 pending_buffers_.erase(buf);
1129 TLOG(TLVL_BUFFER) <<
"Buffer occupancy now (total,full,reading,empty,pending,active)=("
1131 << ReadReadyCount() <<
","
1132 << WriteReadyCount(
true) - WriteReadyCount(
false) - ReadReadyCount() <<
","
1133 << WriteReadyCount(
false) <<
","
1134 << pending_buffers_.size() <<
","
1135 << active_buffers_.size() <<
")";
1140 TLOG(TLVL_TRACE) <<
"Sent tokens: " << requests_->GetSentTokenCount() <<
", Event count: " << run_event_count_;
1141 auto outstanding_tokens = requests_->GetSentTokenCount() - run_event_count_;
1142 auto available_buffers = WriteReadyCount(overwrite_mode_);
1144 TLOG(TLVL_TRACE) <<
"check_pending_buffers_: outstanding_tokens: " << outstanding_tokens <<
", available_buffers: " << available_buffers
1145 <<
", tokens_to_send: " << available_buffers - outstanding_tokens;
1147 if (available_buffers > outstanding_tokens)
1149 auto tokens_to_send = available_buffers - outstanding_tokens;
1151 while (tokens_to_send > 0)
1153 TLOG(35) <<
"check_pending_buffers_: Sending a Routing Token";
1154 requests_->SendRoutingToken(1, run_id_);
1160 if (statsHelper_.readyToReport()) {
1161 std::string statString = buildStatisticsString_();
1162 TLOG(TLVL_INFO) << statString;
1165 metric_data_.event_count += counter;
1166 metric_data_.event_size += eventSize;
1168 if (metricMan && TimeUtils::GetElapsedTimeMilliseconds(last_shmem_buffer_metric_update_) > 500)
1170 TLOG(TLVL_TRACE) <<
"check_pending_buffers_: Sending Metrics";
1171 metricMan->sendMetric(
"Event Rate", metric_data_.event_count,
"Events/s", 1, MetricMode::Rate);
1172 if (metric_data_.event_count > 0) metricMan->sendMetric(
"Average Event Size", metric_data_.event_size / metric_data_.event_count,
"Bytes", 1, MetricMode::Average);
1173 metric_data_ = MetricData();
1175 metricMan->sendMetric(
"Events Released to art this run", run_event_count_,
"Events", 1, MetricMode::LastPoint);
1176 metricMan->sendMetric(
"Incomplete Events Released to art this run", run_incomplete_event_count_,
"Events", 1, MetricMode::LastPoint);
1177 if (requests_) metricMan->sendMetric(
"Tokens sent", requests_->GetSentTokenCount(),
"Tokens", 2, MetricMode::LastPoint);
1179 auto bufferReport = GetBufferReport();
1180 int full = std::count_if(bufferReport.begin(), bufferReport.end(), [](std::pair<int, BufferSemaphoreFlags> p) {
return p.second == BufferSemaphoreFlags::Full; });
1181 int empty = std::count_if(bufferReport.begin(), bufferReport.end(), [](std::pair<int, BufferSemaphoreFlags> p) {
return p.second == BufferSemaphoreFlags::Empty; });
1182 int writing = std::count_if(bufferReport.begin(), bufferReport.end(), [](std::pair<int, BufferSemaphoreFlags> p) {
return p.second == BufferSemaphoreFlags::Writing; });
1183 int reading = std::count_if(bufferReport.begin(), bufferReport.end(), [](std::pair<int, BufferSemaphoreFlags> p) {
return p.second == BufferSemaphoreFlags::Reading; });
1184 auto total = size();
1185 TLOG(TLVL_DEBUG) <<
"Buffer usage: full=" << full <<
", empty=" << empty <<
", writing=" << writing <<
", reading=" << reading <<
", total=" << total;
1187 metricMan->sendMetric(
"Shared Memory Full Buffers", full,
"buffers", 2, MetricMode::LastPoint);
1188 metricMan->sendMetric(
"Shared Memory Available Buffers", empty,
"buffers", 2, MetricMode::LastPoint);
1189 metricMan->sendMetric(
"Shared Memory Pending Buffers", writing,
"buffers", 2, MetricMode::LastPoint);
1190 metricMan->sendMetric(
"Shared Memory Reading Buffers", reading,
"buffers", 2, MetricMode::LastPoint);
1193 metricMan->sendMetric(
"Shared Memory Full %", full * 100 / static_cast<double>(total),
"%", 2, MetricMode::LastPoint);
1194 metricMan->sendMetric(
"Shared Memory Available %", empty * 100 / static_cast<double>(total),
"%", 2, MetricMode::LastPoint);
1197 last_shmem_buffer_metric_update_ = std::chrono::steady_clock::now();
1199 TLOG(TLVL_TRACE) <<
"check_pending_buffers_ END";
1202 void artdaq::SharedMemoryEventManager::send_init_frag_()
1204 if (init_fragment_ !=
nullptr)
1206 TLOG(TLVL_INFO) <<
"Broadcasting init fragment to all art subprocesses...";
1209 std::string fileName =
"receiveInitMessage_" + std::to_string(my_rank) +
".bin";
1210 std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
1211 ostream.write(reinterpret_cast<char*>(init_fragment_->dataBeginBytes()), init_fragment_->dataSizeBytes());
1215 broadcastFragment_(std::move(init_fragment_), init_fragment_);
1216 TLOG(TLVL_TRACE) <<
"Init Fragment sent";
1218 else if (send_init_fragments_)
1220 TLOG(TLVL_WARNING) <<
"Cannot send init fragment because I haven't yet received one!";
1226 if (!init_fragment_ || init_fragment_ ==
nullptr)
1228 init_fragment_.swap(frag);
1235 TLOG(TLVL_DEBUG) <<
"UpdateArtConfiguration BEGIN";
1236 if (art_pset != current_art_pset_ || !current_art_config_file_)
1238 current_art_pset_ = art_pset;
1239 current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
1241 TLOG(TLVL_DEBUG) <<
"UpdateArtConfiguration END";
1244 std::string artdaq::SharedMemoryEventManager::buildStatisticsString_()
const {
1245 std::ostringstream oss;
1246 oss << app_name <<
" statistics:" << std::endl;
1248 artdaq::MonitoredQuantityPtr mqPtr =
1249 artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(EVENTS_RELEASED_STAT_KEY);
1250 if (mqPtr.get() != 0) {
1251 artdaq::MonitoredQuantityStats stats;
1252 mqPtr->getStats(stats);
1253 oss <<
" Event statistics: " << stats.recentSampleCount <<
" events released at " << stats.recentSampleRate
1254 <<
" events/sec, effective data rate = "
1255 << (stats.recentValueRate *
sizeof(artdaq::RawDataType) / 1024.0 / 1024.0)
1256 <<
" MB/sec, monitor window = " << stats.recentDuration
1257 <<
" sec, min::max event size = " << (stats.recentValueMin *
sizeof(artdaq::RawDataType) / 1024.0 / 1024.0)
1258 <<
"::" << (stats.recentValueMax *
sizeof(artdaq::RawDataType) / 1024.0 / 1024.0) <<
" MB" << std::endl;
1259 if (stats.recentSampleRate > 0.0) {
1260 oss <<
" Average time per event: ";
1261 oss <<
" elapsed time = " << (1.0 / stats.recentSampleRate) <<
" sec" << std::endl;
1265 mqPtr = artdaq::StatisticsCollection::getInstance().getMonitoredQuantity(FRAGMENTS_RECEIVED_STAT_KEY);
1266 if (mqPtr.get() != 0) {
1267 artdaq::MonitoredQuantityStats stats;
1268 mqPtr->getStats(stats);
1269 oss <<
" Fragment statistics: " << stats.recentSampleCount <<
" fragments received at " << stats.recentSampleRate
1270 <<
" fragments/sec, effective data rate = "
1271 << (stats.recentValueRate *
sizeof(artdaq::RawDataType) / 1024.0 / 1024.0)
1272 <<
" MB/sec, monitor window = " << stats.recentDuration
1273 <<
" sec, min::max fragment size = " << (stats.recentValueMin *
sizeof(artdaq::RawDataType) / 1024.0 / 1024.0)
1274 <<
"::" << (stats.recentValueMax *
sizeof(artdaq::RawDataType) / 1024.0 / 1024.0) <<
" MB" << std::endl;
1277 oss <<
" Event counts: Run -- " << run_event_count_ <<
" Total, " << run_incomplete_event_count_ <<
" Incomplete."
1278 <<
" Subrun -- " << subrun_event_count_ <<
" Total, " << subrun_incomplete_event_count_ <<
" Incomplete. "
1283 #if MESSAGEFACILITY_HEX_VERSION >= 0x20103
void RunArt(std::shared_ptr< art_config_file > config_file, std::shared_ptr< std::atomic< pid_t >> pid_out)
Run an art instance, recording the return codes and restarting it until the end flag is raised...
void addMonitoredQuantityName(std::string const &statKey)
Add a MonitoredQuantity name to the list.
void ShutdownArtProcesses(std::set< pid_t > &pids)
Shutdown a set of art processes.
virtual ~SharedMemoryEventManager()
SharedMemoryEventManager Destructor.
The SharedMemoryEventManager is a SharedMemoryManger which tracks events as they are built...
Fragment::sequence_id_t sequence_id_t
Copy Fragment::sequence_id_t into local scope.
void ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun=0, int n_art_processes=-1)
Restart all art processes, using the given fhicl code to configure the new art processes.
The RequestSender contains methods used to send data requests and Routing tokens. ...
pid_t StartArtProcess(fhicl::ParameterSet pset)
Start one art process.
RawDataType * WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable=false)
Get a pointer to a reserved memory area for the given Fragment header.
RawEvent::run_id_t run_id_t
Copy RawEvent::run_id_t into local scope.
size_t GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in an event.
void UpdateArtConfiguration(fhicl::ParameterSet art_pset)
Updates the internally-stored copy of the art configuration.
void StartArt()
Start all the art processes.
void SetInitFragment(FragmentPtr frag)
Set the stored Init fragment, if one has not yet been set already.
subrun_id_t GetSubrunForSequenceID(Fragment::sequence_id_t seqID)
Get the subrun number that the given Sequence ID would be assigned to.
void rolloverSubrun()
Add a subrun transition immediately after the highest currently define sequence ID.
void sendMetrics()
Send metrics to the MetricManager, if one has been instantiated in the application.
static const std::string FRAGMENTS_RECEIVED_STAT_KEY
Key for Fragments Received MonitoredQuantity.
bool createCollectors(fhicl::ParameterSet const &pset, int defaultReportIntervalFragments, double defaultReportIntervalSeconds, double defaultMonitorWindow, std::string const &primaryStatKeyName)
Create MonitoredQuantity objects for all names registered with the StatisticsHelper.
SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset)
SharedMemoryEventManager Constructor.
bool endRun()
Send an EndOfRunFragment to the art thread.
void DoneWritingFragment(detail::RawFragmentHeader frag)
Used to indicate that the given Fragment is now completely in the buffer. Will check for buffer compl...
static const std::string EVENTS_RELEASED_STAT_KEY
Key for the Events Released MonitoredQuantity.
bool endOfData()
Indicate that the end of input has been reached to the art processes.
RawEvent::subrun_id_t subrun_id_t
Copy RawEvent::subrun_id_t into local scope.
void startRun(run_id_t runID)
Start a Run.
size_t GetFragmentCountInBuffer(int buffer, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in a buffer.
void CheckPendingBuffers()
Check for buffers which are ready to be marked incomplete and released to art and issue tokens for an...