$treeview $search $mathjax $extrastylesheet
artdaq
v3_04_00
$projectbrief
|
$projectbrief
|
$searchbox |
00001 #ifndef ARTDAQ_DAQRATE_SHAREDMEMORYEVENTMANAGER_HH 00002 #define ARTDAQ_DAQRATE_SHAREDMEMORYEVENTMANAGER_HH 00003 00004 #include "artdaq/DAQdata/Globals.hh" // Before trace.h gets included in ConcurrentQueue (from GlobalQueue) 00005 00006 #include <sys/stat.h> 00007 #include <deque> 00008 #include <fstream> 00009 #include <iomanip> 00010 #include <set> 00011 #include "artdaq-core/Core/SharedMemoryManager.hh" 00012 #include "artdaq-core/Data/RawEvent.hh" 00013 #include "artdaq/Application/StatisticsHelper.hh" 00014 #include "artdaq/DAQrate/RequestSender.hh" 00015 #include "artdaq/DAQrate/detail/ArtConfig.hh" 00016 #include "fhiclcpp/fwd.h" 00017 #define ART_SUPPORTS_DUPLICATE_EVENTS 0 00018 00019 namespace artdaq { 00020 00024 class art_config_file 00025 { 00026 public: 00031 art_config_file(fhicl::ParameterSet ps /*, uint32_t shm_key, uint32_t broadcast_key*/) 00032 : dir_name_("/tmp/partition_" + std::to_string(GetPartitionNumber())) 00033 , file_name_(dir_name_ + "/artConfig_" + std::to_string(my_rank) + "_" + std::to_string(artdaq::TimeUtils::gettimeofday_us()) + ".fcl") 00034 { 00035 mkdir(dir_name_.c_str(), 0777); // Allowed to fail if directory already exists 00036 00037 std::ofstream of(file_name_, std::ofstream::trunc); 00038 if (of.fail()) 00039 { 00040 // Probably a permissions error... 00041 dir_name_ = "/tmp/partition_" + std::to_string(GetPartitionNumber()) + "_" + std::to_string(getuid()); 00042 mkdir(dir_name_.c_str(), 0777); // Allowed to fail if directory already exists 00043 file_name_ = dir_name_ + "/artConfig_" + std::to_string(my_rank) + "_" + std::to_string(artdaq::TimeUtils::gettimeofday_us()) + ".fcl"; 00044 00045 of.open(file_name_, std::ofstream::trunc); 00046 if (of.fail()) 00047 { 00048 TLOG(TLVL_ERROR) << "Failed to open configuration file after two attemps! ABORTING!"; 00049 exit(46); 00050 } 00051 } 00052 of << ps.to_string(); 00053 00054 //if (ps.has_key("services.NetMonTransportServiceInterface")) 00055 //{ 00056 // of << " services.NetMonTransportServiceInterface.shared_memory_key: 0x" << std::hex << shm_key; 00057 // of << " services.NetMonTransportServiceInterface.broadcast_shared_memory_key: 0x" << std::hex << broadcast_key; 00058 // of << " services.NetMonTransportServiceInterface.rank: " << std::dec << my_rank; 00059 //} 00060 if (!ps.has_key("services.message")) 00061 { 00062 of << " services.message: { " << generateMessageFacilityConfiguration("art") << "} "; 00063 } 00064 //of << " source.shared_memory_key: 0x" << std::hex << shm_key; 00065 //of << " source.broadcast_shared_memory_key: 0x" << std::hex << broadcast_key; 00066 //of << " source.rank: " << std::dec << my_rank; 00067 of.close(); 00068 } 00069 ~art_config_file() 00070 { 00071 remove(file_name_.c_str()); 00072 rmdir(dir_name_.c_str()); // Will only delete directory if no config files are left over 00073 } 00078 std::string getFileName() const { return file_name_; } 00079 00080 private: 00081 std::string dir_name_; 00082 std::string file_name_; 00083 }; 00084 00088 class SharedMemoryEventManager : public SharedMemoryManager 00089 { 00090 public: 00091 typedef RawEvent::run_id_t run_id_t; 00092 typedef RawEvent::subrun_id_t subrun_id_t; 00093 typedef Fragment::sequence_id_t sequence_id_t; 00094 typedef std::map<sequence_id_t, RawEvent_ptr> EventMap; 00095 00099 struct Config 00100 { 00103 fhicl::Atom<size_t> max_event_size_bytes{fhicl::Name{"max_event_size_bytes"}, fhicl::Comment{"Maximum event size (all Fragments), in bytes"}}; 00105 fhicl::Atom<size_t> stale_buffer_timeout_usec{fhicl::Name{"stale_buffer_timeout_usec"}, fhicl::Comment{"Maximum amount of time elapsed before a buffer is marked as abandoned. Time is reset each time an operation is performed on the buffer."}, 5000000}; 00107 fhicl::Atom<bool> overwrite_mode{fhicl::Name{"overwrite_mode"}, fhicl::Comment{"Whether buffers are allowed to be overwritten when safe (state == Full or Reading)"}, false}; 00109 fhicl::Atom<bool> restart_crashed_art_processes{fhicl::Name{"restart_crashed_art_processes"}, fhicl::Comment{"Whether to automatically restart art processes that fail for any reason"}, true}; 00111 fhicl::Atom<uint32_t> shared_memory_key{fhicl::Name{"shared_memory_key"}, fhicl::Comment{"Key to use for shared memory access"}, 0xBEE70000 + getpid()}; 00113 fhicl::Atom<size_t> buffer_count{fhicl::Name{"buffer_count"}, fhicl::Comment{"Number of events in the Shared Memory (incomplete + pending art)"}}; 00115 fhicl::Atom<size_t> max_subrun_lookup_table_size{fhicl::Name{"max_subrun_lookup_table_size"}, fhicl::Comment{"Maximum number of entries in the subrun rollover history"}, 100}; 00118 fhicl::Atom<size_t> max_fragment_size_bytes{fhicl::Name{"max_fragment_size_bytes"}, fhicl::Comment{" Maximum Fragment size, in bytes"}}; 00120 fhicl::Atom<size_t> event_queue_wait_time{fhicl::Name{"event_queue_wait_time"}, fhicl::Comment{"Amount of time (in seconds) an event can exist in shared memory before being released to art. Used as input to default parameter of \"stale_buffer_timeout_usec\"."}, 5}; 00122 fhicl::Atom<bool> broadcast_mode{fhicl::Name{"broadcast_mode"}, fhicl::Comment{"When true, buffers are not marked Empty when read, but return to Full state. Buffers are overwritten in order received."}, false}; 00124 fhicl::Atom<size_t> art_analyzer_count{fhicl::Name{"art_analyzer_count"}, fhicl::Comment{"Number of art procceses to start"}, 1}; 00126 fhicl::Atom<size_t> expected_fragments_per_event{fhicl::Name{"expected_fragments_per_event"}, fhicl::Comment{"Number of Fragments to expect per event"}}; 00128 fhicl::Atom<int> maximum_oversize_fragment_count{fhicl::Name{"maximum_oversize_fragment_count"}, fhicl::Comment{"Maximum number of over-size Fragments to drop before throwing an exception. Default is 1, which means to throw an exception if any over-size Fragments are dropped. Set to 0 to disable."}, 1}; 00130 fhicl::Atom<bool> update_run_ids_on_new_fragment{fhicl::Name{"update_run_ids_on_new_fragment"}, fhicl::Comment{"Whether the run and subrun ID of an event should be updated whenever a Fragment is added."}, true}; 00132 fhicl::Atom<bool> use_sequence_id_for_event_number{fhicl::Name{"use_sequence_id_for_event_number"}, fhicl::Comment{"Whether to use the artdaq Sequence ID (true) or the Timestamp (false) for art Event numbers"}, true}; 00134 fhicl::Atom<size_t> max_subrun_event_map_length{fhicl::Name{"max_subrun_event_map_length"}, fhicl::Comment{"The maximum number of entries to store in the sequence ID-SubRun ID lookup table"}, 100}; 00136 fhicl::Atom<bool> send_init_fragments{fhicl::Name{"send_init_fragments"}, fhicl::Comment{"Whether Init Fragments are expected to be sent to art. If true, a Warning message is printed when an Init Fragment is requested but none are available."}, true}; 00138 fhicl::Atom<int> incomplete_event_report_interval_ms{fhicl::Name{"incomplete_event_report_interval_ms"}, fhicl::Comment{"Interval at which an incomplete event report should be written"}, -1}; 00141 fhicl::Atom<int> fragment_broadcast_timeout_ms{fhicl::Name{"fragment_broadcast_timeout_ms"}, fhicl::Comment{"Amount of time broadcast fragments should live in the broadcast shared memory segment"}, 3000}; 00143 fhicl::Atom<double> minimum_art_lifetime_s{fhicl::Name{"minimum_art_lifetime_s"}, fhicl::Comment{"Amount of time that an art process should run to not be considered \"DOA\""}, 2.0}; 00146 fhicl::Atom<size_t> expected_art_event_processing_time_us{fhicl::Name{"expected_art_event_processing_time_us"}, fhicl::Comment{"During shutdown, SMEM will wait for this amount of time while it is checking that the art threads are done reading buffers."}, 100000}; 00148 fhicl::Atom<uint32_t> broadcast_shared_memory_key{fhicl::Name{"broadcast_shared_memory_key"}, fhicl::Comment{""}, 0xCEE70000 + getpid()}; 00150 fhicl::Atom<size_t> broadcast_buffer_count{fhicl::Name{"broadcast_buffer_count"}, fhicl::Comment{"Buffers in the broadcast shared memory segment"}, 10}; 00152 fhicl::Atom<size_t> broadcast_buffer_size{fhicl::Name{"broadcast_buffer_size"}, fhicl::Comment{"Size of the buffers in the broadcast shared memory segment"}, 0x100000}; 00154 fhicl::Atom<bool> use_art{fhicl::Name{"use_art"}, fhicl::Comment{"Whether to start and manage art threads (Sets art_analyzer count to 0 and overwrite_mode to true when false)"}, true}; 00156 fhicl::Atom<bool> manual_art{fhicl::Name{"manual_art"}, fhicl::Comment{"Prints the startup command line for the art process so that the user may (for example) run it in GDB or valgrind"}, false}; 00157 00158 fhicl::TableFragment<artdaq::RequestSender::Config> requestSenderConfig; 00159 }; 00160 using Parameters = fhicl::WrappedTable<Config>; 00161 00167 SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset); 00171 virtual ~SharedMemoryEventManager(); 00172 00173 private: 00180 bool AddFragment(detail::RawFragmentHeader frag, void* dataPtr); 00181 00182 public: 00190 bool AddFragment(FragmentPtr frag, size_t timeout_usec, FragmentPtr& outfrag); 00191 00198 RawDataType* WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable = false); 00199 00204 void DoneWritingFragment(detail::RawFragmentHeader frag); 00205 00210 size_t GetIncompleteEventCount() { return active_buffers_.size(); } 00211 00216 size_t GetPendingEventCount() { return pending_buffers_.size(); } 00217 00222 size_t GetLockedBufferCount() { return GetBuffersOwnedByManager().size(); } 00223 00228 size_t GetArtEventCount() { return run_event_count_; } 00229 00236 size_t GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type = Fragment::InvalidFragmentType); 00237 00244 size_t GetFragmentCountInBuffer(int buffer, Fragment::type_t type = Fragment::InvalidFragmentType); 00245 00249 void RunArt(std::shared_ptr<art_config_file> config_file, std::shared_ptr<std::atomic<pid_t>> pid_out); 00253 void StartArt(); 00254 00260 pid_t StartArtProcess(fhicl::ParameterSet pset); 00261 00266 void ShutdownArtProcesses(std::set<pid_t>& pids); 00267 00274 void ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun = 0, int n_art_processes = -1); 00275 00285 bool endOfData(); 00286 00291 void startRun(run_id_t runID); 00292 00297 run_id_t runID() const { return run_id_; } 00298 00303 bool endRun(); 00304 00310 void rolloverSubrun(sequence_id_t boundary, subrun_id_t subrun); 00311 00315 void rolloverSubrun(); 00316 00320 void sendMetrics(); 00321 00326 void setRequestMode(detail::RequestMessageMode mode) 00327 { 00328 if (requests_) requests_->SetRequestMode(mode); 00329 } 00330 00335 void setOverwrite(bool overwrite) { overwrite_mode_ = overwrite; } 00336 00340 void SetInitFragment(FragmentPtr frag); 00341 00346 uint32_t GetBroadcastKey() { return broadcasts_.GetKey(); } 00347 00352 RawDataType* GetDroppedDataAddress(Fragment::fragment_id_t frag) { return dropped_data_[frag]->dataBegin(); } 00353 00363 void UpdateArtConfiguration(fhicl::ParameterSet art_pset); 00364 00368 void CheckPendingBuffers(); 00369 00374 subrun_id_t GetSubrunForSequenceID(Fragment::sequence_id_t seqID); 00375 00376 subrun_id_t GetCurrentSubrun() { return GetSubrunForSequenceID(Fragment::InvalidSequenceID); } 00377 00378 private: 00379 size_t get_art_process_count_() 00380 { 00381 std::unique_lock<std::mutex> lk(art_process_mutex_); 00382 return art_processes_.size(); 00383 } 00384 00385 private: 00386 size_t num_art_processes_; 00387 size_t const num_fragments_per_event_; 00388 size_t const queue_size_; 00389 run_id_t run_id_; 00390 00391 std::map<sequence_id_t, subrun_id_t> subrun_event_map_; 00392 size_t max_subrun_event_map_length_; 00393 static std::mutex subrun_event_map_mutex_; 00394 00395 std::set<int> active_buffers_; 00396 std::set<int> pending_buffers_; 00397 std::unordered_map<Fragment::sequence_id_t, size_t> released_incomplete_events_; 00398 00399 bool update_run_ids_; 00400 bool use_sequence_id_for_event_number_; 00401 bool overwrite_mode_; 00402 bool send_init_fragments_; 00403 bool running_; 00404 00405 std::unordered_map<int, std::atomic<int>> buffer_writes_pending_; 00406 std::unordered_map<int, std::mutex> buffer_mutexes_; 00407 static std::mutex sequence_id_mutex_; 00408 00409 int incomplete_event_report_interval_ms_; 00410 std::chrono::steady_clock::time_point last_incomplete_event_report_time_; 00411 std::chrono::steady_clock::time_point last_shmem_buffer_metric_update_; 00412 std::chrono::steady_clock::time_point last_backpressure_report_time_; 00413 std::chrono::steady_clock::time_point last_fragment_header_write_time_; 00414 00415 00416 struct MetricData 00417 { 00418 MetricData() 00419 : event_count(0), event_size(0) {} 00420 size_t event_count; 00421 size_t event_size; 00422 }; 00423 MetricData metric_data_; 00424 00425 int broadcast_timeout_ms_; 00426 00427 std::atomic<int> run_event_count_; 00428 std::atomic<int> run_incomplete_event_count_; 00429 std::atomic<int> subrun_event_count_; 00430 std::atomic<int> subrun_incomplete_event_count_; 00431 std::atomic<int> oversize_fragment_count_; 00432 int maximum_oversize_fragment_count_; 00433 00434 mutable std::mutex art_process_mutex_; 00435 std::set<pid_t> art_processes_; 00436 std::atomic<bool> restart_art_; 00437 bool always_restart_art_; 00438 std::atomic<bool> manual_art_; 00439 fhicl::ParameterSet current_art_pset_; 00440 std::shared_ptr<art_config_file> current_art_config_file_; 00441 double minimum_art_lifetime_s_; 00442 size_t art_event_processing_time_us_; 00443 00444 std::unique_ptr<RequestSender> requests_; 00445 fhicl::ParameterSet data_pset_; 00446 00447 FragmentPtr init_fragment_; 00448 std::unordered_map<Fragment::fragment_id_t, FragmentPtr> dropped_data_; 00449 00450 bool broadcastFragment_(FragmentPtr frag, FragmentPtr& outFrag); 00451 00452 detail::RawEventHeader* getEventHeader_(int buffer); 00453 00454 int getBufferForSequenceID_(Fragment::sequence_id_t seqID, bool create_new, Fragment::timestamp_t timestamp = Fragment::InvalidTimestamp); 00455 bool hasFragments_(int buffer); 00456 void complete_buffer_(int buffer); 00457 bool bufferComparator(int bufA, int bufB); 00458 void check_pending_buffers_(std::unique_lock<std::mutex> const& lock); 00459 00460 void send_init_frag_(); 00461 SharedMemoryManager broadcasts_; 00462 }; 00463 } // namespace artdaq 00464 00465 #endif //ARTDAQ_DAQRATE_SHAREDMEMORYEVENTMANAGER_HH