00001 #include "artdaq/DAQrate/SharedMemoryEventManager.hh"
00002 #include "artdaq-core/Core/StatisticsCollection.hh"
00003 #include "artdaq-core/Utilities/TraceLock.hh"
00004 #include <sys/wait.h>
00005 #include "SharedMemoryEventManager.hh"
00006
00007 artdaq::SharedMemoryEventManager::SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset)
00008 : SharedMemoryManager(pset.get<uint32_t>("shared_memory_key", 0xBEE70000 + getpid()),
00009 pset.get<size_t>("buffer_count"),
00010 pset.has_key("max_event_size_bytes") ? pset.get<size_t>("max_event_size_bytes") : pset.get<size_t>("expected_fragments_per_event") * pset.get<size_t>("max_fragment_size_bytes"),
00011 pset.get<size_t>("stale_buffer_timeout_usec", pset.get<size_t>("event_queue_wait_time", 5) * 1000000),
00012 !pset.get<bool>("broadcast_mode", false))
00013 , num_art_processes_(pset.get<size_t>("art_analyzer_count", 1))
00014 , num_fragments_per_event_(pset.get<size_t>("expected_fragments_per_event"))
00015 , queue_size_(pset.get<size_t>("buffer_count"))
00016 , run_id_(0)
00017 , subrun_id_(0)
00018 , update_run_ids_(pset.get<bool>("update_run_ids_on_new_fragment", true))
00019 , overwrite_mode_(!pset.get<bool>("use_art", true) || pset.get<bool>("overwrite_mode", false) || pset.get<bool>("broadcast_mode", false))
00020 , send_init_fragments_(pset.get<bool>("send_init_fragments", true))
00021 , buffer_writes_pending_()
00022 , incomplete_event_report_interval_ms_(pset.get<int>("incomplete_event_report_interval_ms", -1))
00023 , last_incomplete_event_report_time_(std::chrono::steady_clock::now())
00024 , broadcast_timeout_ms_(pset.get<int>("fragment_broadcast_timeout_ms", 3000))
00025 , broadcast_count_(0)
00026 , subrun_event_count_(0)
00027 , art_processes_()
00028 , restart_art_(false)
00029 , current_art_pset_(art_pset)
00030 , requests_(pset)
00031 , broadcasts_(pset.get<uint32_t>("broadcast_shared_memory_key", 0xCEE70000 + getpid()),
00032 pset.get<size_t>("broadcast_buffer_count", 10),
00033 pset.get<size_t>("broadcast_buffer_size", 0x100000),
00034 pset.get<int>("fragment_broadcast_timeout_ms", 3000) * 1000, false)
00035 {
00036 SetMinWriteSize(sizeof(detail::RawEventHeader) + sizeof(detail::RawFragmentHeader));
00037 broadcasts_.SetMinWriteSize(sizeof(detail::RawEventHeader) + sizeof(detail::RawFragmentHeader));
00038
00039 if (pset.get<bool>("use_art", true) == false) {
00040 TLOG_INFO("SharedMemoryEventManager") << "BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:false" << TLOG_ENDL;
00041 num_art_processes_ = 0;
00042 }
00043 else {
00044 TLOG_INFO("SharedMemoryEventManager") << "BEGIN SharedMemoryEventManager CONSTRUCTOR with use_art:true" << TLOG_ENDL;
00045 TLOG_TRACE("SharedMemoryEventManager") << "art_pset is " << art_pset.to_string() << TLOG_ENDL;
00046 }
00047 current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
00048
00049 if (overwrite_mode_ && num_art_processes_ > 0)
00050 {
00051 TLOG_WARNING("SharedMemoryEventManager") << "Art is configured to run, but overwrite mode is enabled! Check your configuration if this in unintentional!" << TLOG_ENDL;
00052 }
00053 else if (overwrite_mode_)
00054 {
00055 TLOG_INFO("SharedMemoryEventManager") << "Overwrite Mode enabled, no configured art processes at startup" << TLOG_ENDL;
00056 }
00057
00058 for (size_t ii = 0; ii < size(); ++ii)
00059 {
00060 buffer_writes_pending_[ii] = 0;
00061 }
00062
00063 if (!IsValid()) throw cet::exception("SharedMemoryEventManager") << "Unable to attach to Shared Memory!";
00064
00065 TLOG_TRACE("SharedMemoryEventManager") << "Setting Writer rank to " << my_rank << TLOG_ENDL;
00066 SetRank(my_rank);
00067 TLOG_DEBUG("SharedMemoryEventManager") << "Writer Rank is " << GetRank() << TLOG_ENDL;
00068
00069
00070 TLOG_TRACE("SharedMemoryEventManager") << "END CONSTRUCTOR" << TLOG_ENDL;
00071 }
00072
00073 artdaq::SharedMemoryEventManager::~SharedMemoryEventManager()
00074 {
00075 TLOG_TRACE("SharedMemoryEventManager") << "DESTRUCTOR" << TLOG_ENDL;
00076 endOfData();
00077 TLOG_TRACE("SharedMemoryEventManager") << "Destructor END" << TLOG_ENDL;
00078 }
00079
00080 bool artdaq::SharedMemoryEventManager::AddFragment(detail::RawFragmentHeader frag, void* dataPtr)
00081 {
00082 TLOG_TRACE("SharedMemoryEventManager") << "AddFragment(Header, ptr) BEGIN frag.word_count=" << std::to_string(frag.word_count)
00083 << ", sequence_id=" << std::to_string(frag.sequence_id) << TLOG_ENDL;
00084 auto buffer = getBufferForSequenceID_(frag.sequence_id, true, frag.timestamp);
00085 TLOG_TRACE("SharedMemoryEventManager") << "Using buffer " << std::to_string(buffer) << TLOG_ENDL;
00086 if (buffer == -1) return false;
00087 if (buffer == -2)
00088 {
00089 TLOG_ERROR("SharedMemoryEventManager") << "Dropping event because data taking has already passed this event number: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
00090 return true;
00091 }
00092
00093 auto hdr = getEventHeader_(buffer);
00094 if (update_run_ids_)
00095 {
00096 hdr->run_id = run_id_;
00097 hdr->subrun_id = subrun_id_;
00098 }
00099
00100 TLOG_TRACE("SharedMemoryEventManager") << "AddFragment before Write calls" << TLOG_ENDL;
00101 Write(buffer, dataPtr, frag.word_count * sizeof(RawDataType));
00102
00103 TLOG_TRACE("SharedMemoryEventManager") << "Checking for complete event" << TLOG_ENDL;
00104 auto fragmentCount = GetFragmentCount(frag.sequence_id);
00105 hdr->is_complete = fragmentCount == num_fragments_per_event_ && buffer_writes_pending_[buffer] == 0;
00106 TLOG_TRACE("SharedMemoryEventManager") << "hdr->is_complete=" << std::boolalpha << hdr->is_complete
00107 << ", fragmentCount=" << std::to_string(fragmentCount)
00108 << ", num_fragments_per_event=" << std::to_string(num_fragments_per_event_)
00109 << ", buffer_writes_pending_[buffer]=" << std::to_string(buffer_writes_pending_[buffer]) << TLOG_ENDL;
00110
00111 complete_buffer_(buffer);
00112 requests_.SendRequest(true);
00113
00114 TLOG_TRACE("SharedMemoryEventManager") << "AddFragment END" << TLOG_ENDL;
00115 return true;
00116 }
00117
00118 bool artdaq::SharedMemoryEventManager::AddFragment(FragmentPtr frag, size_t timeout_usec, FragmentPtr& outfrag)
00119 {
00120 TLOG_TRACE("SharedMemoryEventManager") << "AddFragment(FragmentPtr) BEGIN" << TLOG_ENDL;
00121 auto hdr = *reinterpret_cast<detail::RawFragmentHeader*>(frag->headerAddress());
00122 auto data = frag->headerAddress();
00123 auto start = std::chrono::steady_clock::now();
00124 bool sts = false;
00125 while (!sts && TimeUtils::GetElapsedTimeMicroseconds(start) < timeout_usec)
00126 {
00127 sts = AddFragment(hdr, data);
00128 if (!sts) usleep(1000);
00129 }
00130 if (!sts)
00131 {
00132 outfrag = std::move(frag);
00133 }
00134 TLOG_TRACE("SharedMemoryEventManager") << "AddFragment(FragmentPtr) RETURN " << std::boolalpha << sts << TLOG_ENDL;
00135 return sts;
00136 }
00137
00138 artdaq::RawDataType* artdaq::SharedMemoryEventManager::WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable)
00139 {
00140 TLOG_ARB(14, "SharedMemoryEventManager") << "WriteFragmentHeader BEGIN" << TLOG_ENDL;
00141 auto buffer = getBufferForSequenceID_(frag.sequence_id, true, frag.timestamp);
00142
00143 if (buffer < 0)
00144 {
00145 if (buffer == -1 && !dropIfNoBuffersAvailable) return nullptr;
00146 if (buffer == -2)
00147 {
00148 TLOG_ERROR("SharedMemoryEventManager") << "Dropping fragment because data taking has already passed this event number: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
00149 }
00150 else
00151 {
00152 TLOG_ERROR("SharedMemoryEventManager") << "Dropping fragment because there is no room in the queue and reliable mode is off: " << std::to_string(frag.sequence_id) << TLOG_ENDL;
00153 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
00154 }
00155 dropped_data_.reset(new Fragment(frag.word_count - frag.num_words()));
00156 return dropped_data_->dataBegin();
00157 }
00158
00159 buffer_writes_pending_[buffer]++;
00160 TraceLock lk(buffer_mutexes_[buffer], 50, "WriteFragmentHeader");
00161 Write(buffer, &frag, frag.num_words() * sizeof(RawDataType));
00162
00163 auto pos = reinterpret_cast<RawDataType*>(GetWritePos(buffer));
00164 if (frag.word_count - frag.num_words() > 0) {
00165 IncrementWritePos(buffer, (frag.word_count - frag.num_words()) * sizeof(RawDataType));
00166 }
00167
00168 TLOG_ARB(14, "SharedMemoryEventManager") << "WriteFragmentHeader END" << TLOG_ENDL;
00169 return pos;
00170
00171 }
00172
00173 void artdaq::SharedMemoryEventManager::DoneWritingFragment(detail::RawFragmentHeader frag)
00174 {
00175 TLOG_TRACE("SharedMemoryEventManager") << "DoneWritingFragment BEGIN" << TLOG_ENDL;
00176 auto buffer = getBufferForSequenceID_(frag.sequence_id, false, frag.timestamp);
00177 if (buffer == -1) Detach(true, "SharedMemoryEventManager", "getBufferForSequenceID_ returned -1 when it REALLY shouldn't have! Check program logic!");
00178 if (buffer == -2) return;
00179
00180 auto hdr = getEventHeader_(buffer);
00181 if (update_run_ids_)
00182 {
00183 hdr->run_id = run_id_;
00184 hdr->subrun_id = subrun_id_;
00185 }
00186
00187 buffer_writes_pending_[buffer]--;
00188 if (buffer_writes_pending_[buffer] != 0)
00189 {
00190 TLOG_TRACE("SharedMemoryEventManager") << "Done writing fragment, but there's another writer. Not doing bookkeeping steps." << TLOG_ENDL;
00191 return;
00192 }
00193 auto frag_count = GetFragmentCount(frag.sequence_id);
00194 hdr->is_complete = frag_count == num_fragments_per_event_;
00195 #if ART_SUPPORTS_DUPLICATE_EVENTS
00196 if (!hdr->is_complete && released_incomplete_events_.count(frag.sequence_id)) {
00197 hdr->is_complete = frag_count == released_incomplete_events_[frag.sequence_id] && buffer_writes_pending_[buffer] == 0;
00198 }
00199 #endif
00200
00201 complete_buffer_(buffer);
00202 requests_.SendRequest(true);
00203 TLOG_TRACE("SharedMemoryEventManager") << "DoneWritingFragment END" << TLOG_ENDL;
00204 }
00205
00206 size_t artdaq::SharedMemoryEventManager::GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type)
00207 {
00208 return GetFragmentCountInBuffer(getBufferForSequenceID_(seqID, false), type);
00209 }
00210
00211 size_t artdaq::SharedMemoryEventManager::GetFragmentCountInBuffer(int buffer, Fragment::type_t type)
00212 {
00213 if (buffer == -1) return 0;
00214 ResetReadPos(buffer);
00215 IncrementReadPos(buffer, sizeof(detail::RawEventHeader));
00216
00217 size_t count = 0;
00218
00219 while (MoreDataInBuffer(buffer))
00220 {
00221 auto fragHdr = reinterpret_cast<artdaq::detail::RawFragmentHeader*>(GetReadPos(buffer));
00222 IncrementReadPos(buffer, fragHdr->word_count * sizeof(RawDataType));
00223 if (type != Fragment::InvalidFragmentType && fragHdr->type != type) continue;
00224 TLOG_TRACE("GetFragmentCount") << "Adding Fragment with size=" << std::to_string(fragHdr->word_count) << " to Fragment count" << TLOG_ENDL;
00225 ++count;
00226 }
00227
00228 return count;
00229 }
00230
00231 void artdaq::SharedMemoryEventManager::RunArt(std::shared_ptr<art_config_file> config_file, pid_t& pid_out)
00232 {
00233 while (restart_art_)
00234 {
00235 send_init_frag_();
00236 TLOG_INFO("SharedMemoryEventManager") << "Starting art process with config file " << config_file->getFileName() << TLOG_ENDL;
00237 std::vector<char*> args{ (char*)"art", (char*)"-c", &config_file->getFileName()[0], NULL };
00238
00239 auto pid = fork();
00240 if (pid == 0)
00241 {
00242 execvp("art", &args[0]);
00243 exit(1);
00244 }
00245 pid_out = pid;
00246
00247 TLOG_INFO("SharedMemoryEventManager") << "PID of new art process is " << pid << TLOG_ENDL;
00248 art_processes_.insert(pid);
00249 int status;
00250 waitpid(pid, &status, 0);
00251 TLOG_INFO("SharedMemoryEventManager") << "Removing PID " << pid << " from process list" << TLOG_ENDL;
00252 art_processes_.erase(pid);
00253 if (status == 0)
00254 {
00255 TLOG_INFO("SharedMemoryEventManager") << "art process " << pid << " exited normally, " << (restart_art_ ? "restarting" : "not restarting") << TLOG_ENDL;
00256 }
00257 else
00258 {
00259 TLOG_WARNING("SharedMemoryEventManager") << "art process " << pid << " exited with status code 0x" << std::hex << status << " (" << std::dec << status << "), " << (restart_art_ ? "restarting" : "not restarting") << TLOG_ENDL;
00260 }
00261 }
00262 }
00263
00264 void artdaq::SharedMemoryEventManager::StartArt()
00265 {
00266 restart_art_ = true;
00267 if (num_art_processes_ == 0) return;
00268 for (size_t ii = 0; ii < num_art_processes_; ++ii)
00269 {
00270 StartArtProcess(current_art_pset_);
00271 }
00272 }
00273
00274 pid_t artdaq::SharedMemoryEventManager::StartArtProcess(fhicl::ParameterSet pset)
00275 {
00276 static std::mutex start_art_mutex;
00277 TraceLock lk(start_art_mutex, 15, "StartArtLock");
00278 restart_art_ = true;
00279 auto initialCount = GetAttachedCount();
00280 auto startTime = std::chrono::steady_clock::now();
00281
00282 if (pset != current_art_pset_)
00283 {
00284 current_art_pset_ = pset;
00285 current_art_config_file_ = std::make_shared<art_config_file>(pset);
00286 }
00287 pid_t pid = -1;
00288 boost::thread thread([&] {RunArt(current_art_config_file_, pid); });
00289 thread.detach();
00290
00291
00292 while ((GetAttachedCount() - initialCount < 1 || pid <= 0)
00293 && TimeUtils::GetElapsedTime(startTime) < 5)
00294 {
00295 usleep(1000);
00296 }
00297 if (GetAttachedCount() - initialCount < 1 || pid <= 0)
00298 {
00299 TLOG_WARNING("SharedMemoryEventManager") << "art process has not started after 5s. Check art configuration!"
00300 << " (pid=" << pid << ", attachedCount=" << std::to_string(GetAttachedCount() - initialCount) << ")" << TLOG_ENDL;
00301 return 0;
00302 }
00303 else
00304 {
00305 TLOG_INFO("SharedMemoryEventManager") << std::setw(4) << std::fixed << "art initialization took "
00306 << TimeUtils::GetElapsedTime(startTime) << " seconds." << TLOG_ENDL;
00307
00308 return pid;
00309 }
00310
00311 }
00312
00313 void artdaq::SharedMemoryEventManager::ShutdownArtProcesses(std::set<pid_t> pids)
00314 {
00315 restart_art_ = false;
00316 current_art_config_file_ = nullptr;
00317 current_art_pset_ = fhicl::ParameterSet();
00318
00319 for (auto pid : pids)
00320 {
00321 if (kill(pid, 0) >= 0)
00322 {
00323 pids.erase(pid);
00324 }
00325 }
00326 if (pids.size() == 0)
00327 {
00328 TLOG_ARB(14, "SharedMemoryEventManager") << "All art processes already exited, nothing to do." << TLOG_ENDL;
00329 usleep(1000);
00330 return;
00331 }
00332
00333 TLOG_TRACE("SharedMemoryEventManager") << "Gently informing art processes that it is time to shut down" << TLOG_ENDL;
00334 for (auto pid : pids)
00335 {
00336 kill(pid, SIGQUIT);
00337 }
00338
00339 int graceful_wait_ms = 1000;
00340 int int_wait_ms = 100;
00341
00342 TLOG_TRACE("SharedMemoryEventManager") << "Waiting up to " << graceful_wait_ms << " ms for all art processes to exit gracefully" << TLOG_ENDL;
00343 for (int ii = 0; ii < graceful_wait_ms; ++ii)
00344 {
00345 usleep(1000);
00346
00347 for (auto pid : pids)
00348 {
00349 if (kill(pid, 0) < 0)
00350 {
00351 pids.erase(pid);
00352 }
00353 }
00354 if (pids.size() == 0)
00355 {
00356 TLOG_TRACE("SharedMemoryEventManager") << "All art processes exited after " << ii << " ms." << TLOG_ENDL;
00357 return;
00358 }
00359 }
00360
00361 TLOG_TRACE("SharedMemoryEventManager") << "Insisting that the art processes shut down" << TLOG_ENDL;
00362 for (auto pid : pids)
00363 {
00364 kill(pid, SIGINT);
00365 }
00366
00367 TLOG_TRACE("SharedMemoryEventManager") << "Waiting up to " << int_wait_ms << " ms for all art processes to exit" << TLOG_ENDL;
00368 for (int ii = graceful_wait_ms; ii < graceful_wait_ms + int_wait_ms; ++ii)
00369 {
00370 usleep(1000);
00371
00372 for (auto pid : pids)
00373 {
00374 if (kill(pid, 0) < 0)
00375 {
00376 pids.erase(pid);
00377 }
00378 }
00379
00380 if (pids.size() == 0)
00381 {
00382 TLOG_TRACE("SharedMemoryEventManager") << "All art processes exited after " << ii << " ms." << TLOG_ENDL;
00383 return;
00384 }
00385 }
00386
00387 TLOG_TRACE("SharedMemoryEventManager") << "Killing remaning art processes with extreme prejudice" << TLOG_ENDL;
00388 while (pids.size() > 0)
00389 {
00390 kill(*pids.begin(), SIGKILL);
00391 }
00392 }
00393
00394 void artdaq::SharedMemoryEventManager::ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun, int n_art_processes)
00395 {
00396 TLOG_DEBUG("SharedMemoryEventManager") << "ReconfigureArt BEGIN" << TLOG_ENDL;
00397 if (restart_art_)
00398 {
00399 endOfData();
00400 }
00401 for (size_t ii = 0; ii < broadcasts_.size(); ++ii)
00402 {
00403 broadcasts_.MarkBufferEmpty(ii, true);
00404 }
00405 if (newRun == 0) newRun = run_id_ + 1;
00406 current_art_pset_ = art_pset;
00407 current_art_config_file_ = std::make_shared<art_config_file>(art_pset);
00408
00409 if (n_art_processes != -1)
00410 {
00411 TLOG_INFO("SharedMemoryEventManager") << "Setting number of art processes to " << n_art_processes << TLOG_ENDL;
00412 num_art_processes_ = n_art_processes;
00413 }
00414 startRun(newRun);
00415 TLOG_DEBUG("SharedMemoryEventManager") << "ReconfigureArt END" << TLOG_ENDL;
00416 }
00417
00418 bool artdaq::SharedMemoryEventManager::endOfData()
00419 {
00420 init_fragment_.reset(nullptr);
00421 TLOG_TRACE("SharedMemoryEventManager") << "SharedMemoryEventManager::endOfData" << TLOG_ENDL;
00422 restart_art_ = false;
00423
00424 size_t initialStoreSize = GetIncompleteEventCount();
00425 TLOG_TRACE("SharedMemoryEventManager") << "endOfData: Flushing " << initialStoreSize
00426 << " stale events from the SharedMemoryEventManager." << TLOG_ENDL;
00427 int counter = initialStoreSize;
00428 while (active_buffers_.size() > 0 && counter > 0)
00429 {
00430 complete_buffer_(*active_buffers_.begin());
00431 counter--;
00432 }
00433 TLOG_TRACE("SharedMemoryEventManager") << "endOfData: Done flushing, there are now " << GetIncompleteEventCount()
00434 << " stale events in the SharedMemoryEventManager." << TLOG_ENDL;
00435
00436
00437 TLOG_TRACE("SharedMemoryEventManager") << "Waiting for " << std::to_string(ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_))) << " outstanding buffers..." << TLOG_ENDL;
00438 auto start = std::chrono::steady_clock::now();
00439 auto lastReadCount = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
00440
00441
00442 while (lastReadCount > 0 && TimeUtils::GetElapsedTime(start) < 1)
00443 {
00444 auto temp = ReadReadyCount() + (size() - WriteReadyCount(overwrite_mode_));
00445 if (temp != lastReadCount)
00446 {
00447 TLOG_TRACE("SharedMemoryEventManager") << "Waiting for " << std::to_string(temp) << " outstanding buffers..." << TLOG_ENDL;
00448 lastReadCount = temp;
00449 start = std::chrono::steady_clock::now();
00450 }
00451 if (lastReadCount > 0) usleep(1000);
00452 }
00453
00454 TLOG_TRACE("SharedMemoryEventManager") << "endOfData: Broadcasting EndOfData Fragment" << TLOG_ENDL;
00455 FragmentPtr outFrag = std::move(Fragment::eodFrag(GetBufferCount()));
00456 bool success = broadcastFragment_(std::move(outFrag), outFrag);
00457 if (!success)
00458 {
00459 TLOG_TRACE("SharedMemoryEventManager") << "endOfData: Clearing buffers to make room for EndOfData Fragment" << TLOG_ENDL;
00460 for (size_t ii = 0; ii < size(); ++ii)
00461 {
00462 broadcasts_.MarkBufferEmpty(ii, true);
00463 }
00464 broadcastFragment_(std::move(outFrag), outFrag);
00465 }
00466
00467 while (art_processes_.size() > 0)
00468 {
00469 TLOG_DEBUG("SharedMemoryEventManager") << "Waiting for all art processes to exit, there are " << std::to_string(art_processes_.size()) << " remaining." << TLOG_ENDL;
00470 ShutdownArtProcesses(art_processes_);
00471 }
00472 ResetAttachedCount();
00473
00474 TLOG_TRACE("SharedMemoryEventManager") << "endOfData: Clearing buffers" << TLOG_ENDL;
00475 for (size_t ii = 0; ii < size(); ++ii)
00476 {
00477 MarkBufferEmpty(ii, true);
00478 }
00479 released_incomplete_events_.clear();
00480
00481 TLOG_TRACE("SharedMemoryEventManager") << "endOfData END" << TLOG_ENDL;
00482 TLOG_INFO("SharedMemoryEventManager") << "EndOfData Complete. There were " << GetLastSeenBufferID() << " events processed in this run." << TLOG_ENDL;
00483 return true;
00484 }
00485
00486 void artdaq::SharedMemoryEventManager::startRun(run_id_t runID)
00487 {
00488 init_fragment_.reset(nullptr);
00489 StartArt();
00490 run_id_ = runID;
00491 subrun_id_ = 1;
00492 requests_.SendRoutingToken(queue_size_);
00493 TLOG_DEBUG("SharedMemoryEventManager") << "Starting run " << run_id_
00494 << ", max queue size = "
00495 << queue_size_
00496 << ", queue size = "
00497 << GetLockedBufferCount() << TLOG_ENDL;
00498 if (metricMan)
00499 {
00500 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
00501 metricMan->sendMetric("Run Number", runSubrun, "Run:Subrun", 1, MetricMode::LastPoint);
00502 }
00503 }
00504
00505 void artdaq::SharedMemoryEventManager::startSubrun()
00506 {
00507 ++subrun_id_;
00508 if (metricMan)
00509 {
00510 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
00511 metricMan->sendMetric("Run Number", runSubrun, "Run:Subrun", 1, MetricMode::LastPoint);
00512 }
00513 }
00514
00515 bool artdaq::SharedMemoryEventManager::endRun()
00516 {
00517 FragmentPtr endOfRunFrag(new
00518 Fragment(static_cast<size_t>
00519 (ceil(sizeof(my_rank) /
00520 static_cast<double>(sizeof(Fragment::value_type))))));
00521
00522 endOfRunFrag->setSystemType(Fragment::EndOfRunFragmentType);
00523 *endOfRunFrag->dataBegin() = my_rank;
00524 broadcastFragment_(std::move(endOfRunFrag), endOfRunFrag);
00525
00526 return true;
00527 }
00528
00529 bool artdaq::SharedMemoryEventManager::endSubrun()
00530 {
00531 std::unique_ptr<artdaq::Fragment>
00532 endOfSubrunFrag(new
00533 Fragment(static_cast<size_t>
00534 (ceil(sizeof(my_rank) /
00535 static_cast<double>(sizeof(Fragment::value_type))))));
00536
00537 endOfSubrunFrag->setSystemType(Fragment::EndOfSubrunFragmentType);
00538 *endOfSubrunFrag->dataBegin() = my_rank;
00539
00540 broadcastFragment_(std::move(endOfSubrunFrag), endOfSubrunFrag);
00541
00542 TLOG_INFO("SharedMemoryEventManager") << "Subrun " << subrun_id_ << " in run " << run_id_ << " has ended. There were " << subrun_event_count_ << " events in this subrun." << TLOG_ENDL;
00543 subrun_event_count_ = 0;
00544
00545 return true;
00546 }
00547
00548 void artdaq::SharedMemoryEventManager::sendMetrics()
00549 {
00550 if (metricMan)
00551 {
00552 metricMan->sendMetric("Incomplete Event Count", GetIncompleteEventCount(), "events", 1, MetricMode::LastPoint);
00553 metricMan->sendMetric("Pending Event Count", GetPendingEventCount(), "events", 1, MetricMode::LastPoint);
00554 }
00555 check_pending_buffers_();
00556 if (incomplete_event_report_interval_ms_ > 0 && GetLockedBufferCount())
00557 {
00558 if (TimeUtils::GetElapsedTimeMilliseconds(last_incomplete_event_report_time_) < static_cast<size_t>(incomplete_event_report_interval_ms_))
00559 return;
00560
00561 last_incomplete_event_report_time_ = std::chrono::steady_clock::now();
00562 std::ostringstream oss;
00563 oss << "Incomplete Events (" << num_fragments_per_event_ << "): ";
00564 for (auto& ev : active_buffers_)
00565 {
00566 auto hdr = getEventHeader_(ev);
00567 oss << hdr->sequence_id << " (" << GetFragmentCount(hdr->sequence_id) << "), ";
00568 }
00569 TLOG_DEBUG("SharedMemoryEventManager") << oss.str() << TLOG_ENDL;
00570 }
00571 }
00572
00573 bool artdaq::SharedMemoryEventManager::broadcastFragment_(FragmentPtr frag, FragmentPtr& outFrag)
00574 {
00575 auto buffer = broadcasts_.GetBufferForWriting(false);
00576 auto start_time = std::chrono::steady_clock::now();
00577 while (buffer == -1 && TimeUtils::GetElapsedTimeMilliseconds(start_time) < static_cast<size_t>(broadcast_timeout_ms_))
00578 {
00579 usleep(10000);
00580 buffer = broadcasts_.GetBufferForWriting(false);
00581 }
00582 if (buffer == -1)
00583 {
00584 TLOG_ERROR("SharedMemoryEventManager") << "Broadcast of fragment type " << frag->typeString() << " failed due to timeout waiting for buffer!" << TLOG_ENDL;
00585 outFrag.swap(frag);
00586 return false;
00587 }
00588
00589 auto hdr = reinterpret_cast<detail::RawEventHeader*>(broadcasts_.GetBufferStart(buffer));
00590 hdr->run_id = run_id_;
00591 hdr->subrun_id = subrun_id_;
00592 hdr->sequence_id = frag->sequenceID();
00593 hdr->is_complete = true;
00594 broadcasts_.IncrementWritePos(buffer, sizeof(detail::RawEventHeader));
00595
00596 TLOG_TRACE("SharedMemoryEventManager") << "broadcastFragment_ before Write calls" << TLOG_ENDL;
00597 broadcasts_.Write(buffer, frag->headerAddress(), frag->size() * sizeof(RawDataType));
00598
00599 broadcasts_.MarkBufferFull(buffer, -1);
00600 outFrag.swap(frag);
00601 return true;
00602 }
00603
00604 artdaq::detail::RawEventHeader* artdaq::SharedMemoryEventManager::getEventHeader_(int buffer)
00605 {
00606 return reinterpret_cast<detail::RawEventHeader*>(GetBufferStart(buffer));
00607 }
00608
00609 int artdaq::SharedMemoryEventManager::getBufferForSequenceID_(Fragment::sequence_id_t seqID, bool create_new, Fragment::timestamp_t timestamp)
00610 {
00611 check_pending_buffers_();
00612 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
00613 TLOG_ARB(14, "SharedMemoryEventManager") << "getBufferForSequenceID " << std::to_string(seqID) << " BEGIN" << TLOG_ENDL;
00614 auto buffers = GetBuffersOwnedByManager();
00615 for (auto& buf : buffers)
00616 {
00617 auto hdr = getEventHeader_(buf);
00618 if (hdr->sequence_id == seqID)
00619 {
00620 TLOG_ARB(14, "SharedMemoryEventManager") << "getBufferForSequenceID " << std::to_string(seqID) << " returning " << buf << TLOG_ENDL;
00621 return buf;
00622 }
00623 }
00624
00625 #if !ART_SUPPORTS_DUPLICATE_EVENTS
00626 if (released_incomplete_events_.count(seqID)) {
00627 TLOG_ERROR("SharedMemoryEventManager") << "Buffer has already been marked \"Incomplete\" and sent to art!" << TLOG_ENDL;
00628 return -2;
00629 }
00630 #endif
00631
00632 if (!create_new) return -1;
00633
00634 int new_buffer = GetBufferForWriting(false);
00635
00636 if (new_buffer == -1)
00637 {
00638 new_buffer = GetBufferForWriting(overwrite_mode_);
00639 }
00640
00641 if (new_buffer == -1) return -1;
00642 TraceLock(buffer_mutexes_[new_buffer], 34, "getBufferForSequenceID");
00643 auto hdr = getEventHeader_(new_buffer);
00644 hdr->is_complete = false;
00645 hdr->run_id = run_id_;
00646 hdr->subrun_id = subrun_id_;
00647 hdr->sequence_id = seqID;
00648 buffer_writes_pending_[new_buffer] = 0;
00649 IncrementWritePos(new_buffer, sizeof(detail::RawEventHeader));
00650
00651 active_buffers_.insert(new_buffer);
00652
00653 if (timestamp != Fragment::InvalidTimestamp)
00654 {
00655 requests_.AddRequest(seqID, timestamp);
00656 }
00657 requests_.SendRequest();
00658 TLOG_ARB(14, "SharedMemoryEventManager") << "getBufferForSequenceID " << std::to_string(seqID) << " returning newly initialized buffer " << new_buffer << TLOG_ENDL;
00659 return new_buffer;
00660 }
00661
00662 bool artdaq::SharedMemoryEventManager::hasFragments_(int buffer)
00663 {
00664 if (buffer == -1) return true;
00665 if (!CheckBuffer(buffer, BufferSemaphoreFlags::Writing))
00666 {
00667 return true;
00668 }
00669 ResetReadPos(buffer);
00670 IncrementReadPos(buffer, sizeof(detail::RawEventHeader));
00671 return MoreDataInBuffer(buffer);
00672 }
00673
00674 void artdaq::SharedMemoryEventManager::complete_buffer_(int buffer)
00675 {
00676 auto hdr = getEventHeader_(buffer);
00677 if (hdr->is_complete)
00678 {
00679 TLOG_DEBUG("SharedMemoryEventManager") << "complete_buffer_: This fragment completes event " << std::to_string(hdr->sequence_id) << "." << TLOG_ENDL;
00680
00681 requests_.RemoveRequest(hdr->sequence_id);
00682 requests_.SendRoutingToken(1);
00683 {
00684 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
00685 active_buffers_.erase(buffer);
00686 pending_buffers_.insert(buffer);
00687 }
00688 }
00689 check_pending_buffers_();
00690 }
00691
00692 bool artdaq::SharedMemoryEventManager::bufferComparator(int bufA, int bufB)
00693 {
00694 return getEventHeader_(bufA)->sequence_id < getEventHeader_(bufB)->sequence_id;
00695 }
00696
00697 void artdaq::SharedMemoryEventManager::check_pending_buffers_()
00698 {
00699 TLOG_TRACE("SharedMemoryEventManager") << "check_pending_buffers_ BEGIN" << TLOG_ENDL;
00700 {
00701 std::unique_lock<std::mutex> lk(sequence_id_mutex_);
00702 auto buffers = GetBuffersOwnedByManager();
00703 for (auto buf : buffers)
00704 {
00705 if (ResetBuffer(buf) && !pending_buffers_.count(buf))
00706 {
00707 auto hdr = getEventHeader_(buf);
00708 if (active_buffers_.count(buf))
00709 {
00710 TLOG_WARNING("SharedMemoryEventManager") << "Active event " << std::to_string(hdr->sequence_id) << " is stale. Scheduling release of incomplete event to art." << TLOG_ENDL;
00711 requests_.RemoveRequest(hdr->sequence_id);
00712 requests_.SendRoutingToken(1);
00713 active_buffers_.erase(buf);
00714 pending_buffers_.insert(buf);
00715 if (!released_incomplete_events_.count(hdr->sequence_id)) {
00716 released_incomplete_events_[hdr->sequence_id] = num_fragments_per_event_ - GetFragmentCountInBuffer(buf);
00717 }
00718 else {
00719 released_incomplete_events_[hdr->sequence_id] -= GetFragmentCountInBuffer(buf);
00720 }
00721 }
00722
00723 }
00724 }
00725
00726 Fragment::sequence_id_t lowestSeqId = Fragment::InvalidSequenceID;
00727
00728
00729 if (WriteReadyCount(false) != 0)
00730 {
00731 for (auto buf : active_buffers_)
00732 {
00733 auto hdr = getEventHeader_(buf);
00734 TLOG_TRACE("SharedMemoryEventManager") << "Buffer: " << buf << ", SeqID: " << std::to_string(hdr->sequence_id) << ", ACTIVE" << TLOG_ENDL;
00735 if (hdr->sequence_id < lowestSeqId)
00736 {
00737 lowestSeqId = hdr->sequence_id;
00738 }
00739 }
00740 TLOG_TRACE("SharedMemoryEventManager") << "Lowest SeqID held: " << std::to_string(lowestSeqId) << TLOG_ENDL;
00741 }
00742
00743 std::list<int> sorted_buffers(pending_buffers_.begin(), pending_buffers_.end());
00744 sorted_buffers.sort([this](int a, int b) {return bufferComparator(a, b); });
00745 for (auto buf : sorted_buffers)
00746 {
00747 auto hdr = getEventHeader_(buf);
00748 if (hdr->sequence_id > lowestSeqId) break;
00749 TLOG_DEBUG("SharedMemoryEventManager") << "Releasing event " << std::to_string(hdr->sequence_id) << " in buffer " << buf << " to art." << TLOG_ENDL;
00750 MarkBufferFull(buf);
00751 subrun_event_count_++;
00752 pending_buffers_.erase(buf);
00753 }
00754 }
00755
00756 TLOG_TRACE("SharedMemoryEventManager") << "check_pending_buffers_: Sending Metrics" << TLOG_ENDL;
00757 if (metricMan)
00758 {
00759 auto full = ReadReadyCount();
00760 auto empty = WriteReadyCount(overwrite_mode_);
00761 auto total = size();
00762 metricMan->sendMetric("Shared Memory Full Buffers", full, "buffers", 2, MetricMode::LastPoint);
00763 metricMan->sendMetric("Shared Memory Available Buffers", empty, "buffers", 2, MetricMode::LastPoint);
00764 metricMan->sendMetric("Shared Memory Full %", full * 100 / static_cast<double>(total), "%", 2, MetricMode::LastPoint);
00765 metricMan->sendMetric("Shared Memory Available %", empty * 100 / static_cast<double>(total), "%", 2, MetricMode::LastPoint);
00766 }
00767 TLOG_TRACE("SharedMemoryEventManager") << "check_pending_buffers_ END" << TLOG_ENDL;
00768 }
00769
00770 void artdaq::SharedMemoryEventManager::send_init_frag_()
00771 {
00772 if (init_fragment_ != nullptr)
00773 {
00774 TLOG_TRACE("SharedMemoryEventManager") << "Sending init Fragment to art..." << TLOG_ENDL;
00775
00776 #if 0
00777 std::string fileName = "receiveInitMessage_" + std::to_string(my_rank) + ".bin";
00778 std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
00779 ostream.write(reinterpret_cast<char*>(init_fragment_->dataBeginBytes()), init_fragment_->dataSizeBytes());
00780 ostream.close();
00781 #endif
00782
00783 broadcastFragment_(std::move(init_fragment_), init_fragment_);
00784 TLOG_TRACE("SharedMemoryEventManager") << "Init Fragment sent" << TLOG_ENDL;
00785 }
00786 else if (send_init_fragments_)
00787 {
00788 TLOG_WARNING("SharedMemoryEventManager") << "Cannot send init fragment because I haven't yet received one!" << TLOG_ENDL;
00789 }
00790 }
00791
00792 void artdaq::SharedMemoryEventManager::SetInitFragment(FragmentPtr frag)
00793 {
00794 if (!init_fragment_ || init_fragment_ == nullptr)
00795 {
00796 init_fragment_.swap(frag);
00797 send_init_frag_();
00798 }
00799 }