00001 #ifndef ARTDAQ_DAQRATE_SHAREDMEMORYEVENTMANAGER_HH
00002 #define ARTDAQ_DAQRATE_SHAREDMEMORYEVENTMANAGER_HH
00003
00004 #include "artdaq/DAQdata/Globals.hh"
00005 #include "artdaq-core/Core/SharedMemoryManager.hh"
00006 #include "artdaq-core/Data/RawEvent.hh"
00007 #include "artdaq/DAQrate/RequestSender.hh"
00008 #include <set>
00009 #include <deque>
00010 #include <fstream>
00011 #include <iomanip>
00012 #include <sys/stat.h>
00013 #include "fhiclcpp/fwd.h"
00014 #include "artdaq/Application/StatisticsHelper.hh"
00015 #include "artdaq/DAQrate/detail/ArtConfig.hh"
00016 #define ART_SUPPORTS_DUPLICATE_EVENTS 0
00017
00018 namespace artdaq {
00019
00023 class art_config_file
00024 {
00025 public:
00030 art_config_file(fhicl::ParameterSet ps)
00031 : dir_name_("/tmp/partition_" + std::to_string(GetPartitionNumber()))
00032 , file_name_(dir_name_ + "/artConfig_" + std::to_string(my_rank) + "_" + std::to_string(artdaq::TimeUtils::gettimeofday_us()) + ".fcl")
00033 {
00034 mkdir(dir_name_.c_str(), 0777);
00035
00036 std::ofstream of(file_name_, std::ofstream::trunc);
00037 if (of.fail()) {
00038
00039 dir_name_ = "/tmp/partition_" + std::to_string(GetPartitionNumber()) + "_" + std::to_string(getuid());
00040 mkdir(dir_name_.c_str(), 0777);
00041 file_name_ = dir_name_ + "/artConfig_" + std::to_string(my_rank) + "_" + std::to_string(artdaq::TimeUtils::gettimeofday_us()) + ".fcl";
00042
00043 of.open(file_name_, std::ofstream::trunc);
00044 if (of.fail())
00045 {
00046 TLOG(TLVL_ERROR) << "Failed to open configuration file after two attemps! ABORTING!";
00047 exit(46);
00048 }
00049 }
00050 of << ps.to_string();
00051
00052
00053
00054
00055
00056
00057
00058 if (!ps.has_key("services.message"))
00059 {
00060 of << " services.message: { " << generateMessageFacilityConfiguration("art") << "} ";
00061 }
00062
00063
00064
00065 of.close();
00066 }
00067 ~art_config_file()
00068 {
00069 remove(file_name_.c_str());
00070 rmdir(dir_name_.c_str());
00071 }
00076 std::string getFileName() const { return file_name_; }
00077 private:
00078 std::string dir_name_;
00079 std::string file_name_;
00080 };
00081
00085 class SharedMemoryEventManager : public SharedMemoryManager
00086 {
00087 public:
00088 typedef RawEvent::run_id_t run_id_t;
00089 typedef RawEvent::subrun_id_t subrun_id_t;
00090 typedef Fragment::sequence_id_t sequence_id_t;
00091 typedef std::map<sequence_id_t, RawEvent_ptr> EventMap;
00092
00096 struct Config
00097 {
00100 fhicl::Atom<size_t> max_event_size_bytes{ fhicl::Name{ "max_event_size_bytes"}, fhicl::Comment{"Maximum event size (all Fragments), in bytes"} };
00102 fhicl::Atom<size_t> stale_buffer_timeout_usec{ fhicl::Name{ "stale_buffer_timeout_usec"}, fhicl::Comment{"Maximum amount of time elapsed before a buffer is marked as abandoned. Time is reset each time an operation is performed on the buffer."}, 5000000 };
00104 fhicl::Atom<bool> overwrite_mode{ fhicl::Name{ "overwrite_mode"}, fhicl::Comment{"Whether buffers are allowed to be overwritten when safe (state == Full or Reading)"}, false };
00106 fhicl::Atom<bool> restart_crashed_art_processes{ fhicl::Name{"restart_crashed_art_processes"}, fhicl::Comment{"Whether to automatically restart art processes that fail for any reason"}, true };
00108 fhicl::Atom<uint32_t> shared_memory_key{ fhicl::Name{ "shared_memory_key"}, fhicl::Comment{"Key to use for shared memory access"}, 0xBEE70000 + getpid() };
00110 fhicl::Atom<size_t> buffer_count{ fhicl::Name{ "buffer_count"}, fhicl::Comment{"Number of events in the Shared Memory (incomplete + pending art)"} };
00113 fhicl::Atom<size_t> max_fragment_size_bytes{ fhicl::Name{ "max_fragment_size_bytes"}, fhicl::Comment{" Maximum Fragment size, in bytes"} };
00115 fhicl::Atom<size_t> event_queue_wait_time{ fhicl::Name{ "event_queue_wait_time"}, fhicl::Comment{"Amount of time (in seconds) an event can exist in shared memory before being released to art. Used as input to default parameter of \"stale_buffer_timeout_usec\"."}, 5 };
00117 fhicl::Atom<bool> broadcast_mode{ fhicl::Name{ "broadcast_mode"}, fhicl::Comment{"When true, buffers are not marked Empty when read, but return to Full state. Buffers are overwritten in order received."}, false };
00119 fhicl::Atom<size_t> art_analyzer_count{ fhicl::Name{ "art_analyzer_count"}, fhicl::Comment{"Number of art procceses to start"}, 1 };
00121 fhicl::Atom<size_t> expected_fragments_per_event{ fhicl::Name{ "expected_fragments_per_event"}, fhicl::Comment{"Number of Fragments to expect per event"} };
00123 fhicl::Atom<int> maximum_oversize_fragment_count{ fhicl::Name{"maximum_oversize_fragment_count"}, fhicl::Comment{"Maximum number of over-size Fragments to drop before throwing an exception. Default is 1, which means to throw an exception if any over-size Fragments are dropped. Set to 0 to disable."},1 };
00125 fhicl::Atom<bool> update_run_ids_on_new_fragment{ fhicl::Name{ "update_run_ids_on_new_fragment"}, fhicl::Comment{"Whether the run and subrun ID of an event should be updated whenever a Fragment is added."}, true };
00127 fhicl::Atom<bool> use_sequence_id_for_event_number{ fhicl::Name{"use_sequence_id_for_event_number"}, fhicl::Comment{"Whether to use the artdaq Sequence ID (true) or the Timestamp (false) for art Event numbers"}, true };
00129 fhicl::Atom<bool> send_init_fragments{ fhicl::Name{ "send_init_fragments"}, fhicl::Comment{"Whether Init Fragments are expected to be sent to art. If true, a Warning message is printed when an Init Fragment is requested but none are available."}, true };
00131 fhicl::Atom<int> incomplete_event_report_interval_ms{ fhicl::Name{ "incomplete_event_report_interval_ms"}, fhicl::Comment{"Interval at which an incomplete event report should be written"}, -1 };
00134 fhicl::Atom<int> fragment_broadcast_timeout_ms{ fhicl::Name{ "fragment_broadcast_timeout_ms"}, fhicl::Comment{"Amount of time broadcast fragments should live in the broadcast shared memory segment"}, 3000 };
00136 fhicl::Atom<double> minimum_art_lifetime_s{ fhicl::Name{ "minimum_art_lifetime_s"}, fhicl::Comment{"Amount of time that an art process should run to not be considered \"DOA\""}, 2.0 };
00139 fhicl::Atom<size_t> expected_art_event_processing_time_us{ fhicl::Name{ "expected_art_event_processing_time_us"}, fhicl::Comment{"During shutdown, SMEM will wait for this amount of time while it is checking that the art threads are done reading buffers."}, 100000 };
00141 fhicl::Atom<uint32_t> broadcast_shared_memory_key{ fhicl::Name{ "broadcast_shared_memory_key"}, fhicl::Comment{""}, 0xCEE70000 + getpid() };
00143 fhicl::Atom<size_t> broadcast_buffer_count{ fhicl::Name{ "broadcast_buffer_count"}, fhicl::Comment{"Buffers in the broadcast shared memory segment"}, 10 };
00145 fhicl::Atom<size_t> broadcast_buffer_size{ fhicl::Name{ "broadcast_buffer_size"}, fhicl::Comment{"Size of the buffers in the broadcast shared memory segment"}, 0x100000 };
00147 fhicl::Atom<bool> use_art{ fhicl::Name{ "use_art"}, fhicl::Comment{"Whether to start and manage art threads (Sets art_analyzer count to 0 and overwrite_mode to true when false)"}, true };
00149 fhicl::Atom<bool> manual_art{ fhicl::Name{"manual_art"}, fhicl::Comment{"Prints the startup command line for the art process so that the user may (for example) run it in GDB or valgrind"}, false };
00150
00151 fhicl::TableFragment<artdaq::RequestSender::Config> requestSenderConfig;
00152 };
00153 using Parameters = fhicl::WrappedTable<Config>;
00154
00160 SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset);
00164 virtual ~SharedMemoryEventManager();
00165
00166 private:
00173 bool AddFragment(detail::RawFragmentHeader frag, void* dataPtr);
00174
00175 public:
00183 bool AddFragment(FragmentPtr frag, size_t timeout_usec, FragmentPtr& outfrag);
00184
00191 RawDataType* WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable = false);
00192
00197 void DoneWritingFragment(detail::RawFragmentHeader frag);
00198
00203 size_t GetIncompleteEventCount() { return active_buffers_.size(); }
00204
00209 size_t GetPendingEventCount() { return pending_buffers_.size(); }
00210
00215 size_t GetLockedBufferCount() { return GetBuffersOwnedByManager().size(); }
00216
00221 size_t GetArtEventCount() { return subrun_event_count_; }
00222
00229 size_t GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type = Fragment::InvalidFragmentType);
00230
00237 size_t GetFragmentCountInBuffer(int buffer, Fragment::type_t type = Fragment::InvalidFragmentType);
00238
00242 void RunArt(std::shared_ptr<art_config_file> config_file, std::shared_ptr<std::atomic<pid_t>> pid_out);
00246 void StartArt();
00247
00253 pid_t StartArtProcess(fhicl::ParameterSet pset);
00254
00259 void ShutdownArtProcesses(std::set<pid_t>& pids);
00260
00267 void ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun = 0, int n_art_processes = -1);
00268
00278 bool endOfData();
00279
00284 void startRun(run_id_t runID);
00285
00289 void startSubrun();
00290
00295 run_id_t runID() const { return run_id_; }
00296
00301 subrun_id_t subrunID() const { return subrun_id_; }
00302
00307 bool endRun();
00308
00313 bool endSubrun();
00314
00319 void rolloverSubrun(sequence_id_t boundary);
00320
00324 void sendMetrics();
00325
00330 void setRequestMode(detail::RequestMessageMode mode) { if (requests_) requests_->SetRequestMode(mode); }
00331
00336 void setOverwrite(bool overwrite) { overwrite_mode_ = overwrite; }
00337
00341 void SetInitFragment(FragmentPtr frag);
00342
00347 uint32_t GetBroadcastKey() { return broadcasts_.GetKey(); }
00348
00353 RawDataType* GetDroppedDataAddress(Fragment::fragment_id_t frag) { return dropped_data_[frag]->dataBegin(); }
00354
00364 void UpdateArtConfiguration(fhicl::ParameterSet art_pset);
00365
00369 void CheckPendingBuffers();
00370
00371 private:
00372 size_t get_art_process_count_()
00373 {
00374 std::unique_lock<std::mutex> lk(art_process_mutex_);
00375 return art_processes_.size();
00376 }
00377
00378 private:
00379
00380 size_t num_art_processes_;
00381 size_t const num_fragments_per_event_;
00382 size_t const queue_size_;
00383 run_id_t run_id_;
00384 subrun_id_t subrun_id_;
00385 sequence_id_t subrun_rollover_event_;
00386 sequence_id_t last_released_event_;
00387
00388 std::set<int> active_buffers_;
00389 std::set<int> pending_buffers_;
00390 std::unordered_map<Fragment::sequence_id_t, size_t> released_incomplete_events_;
00391
00392 bool update_run_ids_;
00393 bool use_sequence_id_for_event_number_;
00394 bool overwrite_mode_;
00395 bool send_init_fragments_;
00396 bool running_;
00397
00398 std::unordered_map<int, std::atomic<int>> buffer_writes_pending_;
00399 std::unordered_map<int, std::mutex> buffer_mutexes_;
00400 static std::mutex sequence_id_mutex_;
00401
00402 int incomplete_event_report_interval_ms_;
00403 std::chrono::steady_clock::time_point last_incomplete_event_report_time_;
00404 std::chrono::steady_clock::time_point last_shmem_buffer_metric_update_;
00405
00406 struct MetricData {
00407 MetricData() : event_count(0), event_size(0) {}
00408 size_t event_count;
00409 size_t event_size;
00410 };
00411 MetricData metric_data_;
00412
00413 int broadcast_timeout_ms_;
00414
00415 std::atomic<int> run_event_count_;
00416 std::atomic<int> run_incomplete_event_count_;
00417 std::atomic<int> subrun_event_count_;
00418 std::atomic<int> subrun_incomplete_event_count_;
00419 std::atomic<int> oversize_fragment_count_;
00420 int maximum_oversize_fragment_count_;
00421
00422 mutable std::mutex art_process_mutex_;
00423 std::set<pid_t> art_processes_;
00424 std::atomic<bool> restart_art_;
00425 bool always_restart_art_;
00426 std::atomic<bool> manual_art_;
00427 fhicl::ParameterSet current_art_pset_;
00428 std::shared_ptr<art_config_file> current_art_config_file_;
00429 double minimum_art_lifetime_s_;
00430 size_t art_event_processing_time_us_;
00431
00432 std::unique_ptr<RequestSender> requests_;
00433 fhicl::ParameterSet data_pset_;
00434
00435 FragmentPtr init_fragment_;
00436 std::unordered_map<Fragment::fragment_id_t, FragmentPtr> dropped_data_;
00437
00438 bool broadcastFragment_(FragmentPtr frag, FragmentPtr& outFrag);
00439
00440 detail::RawEventHeader* getEventHeader_(int buffer);
00441
00442 int getBufferForSequenceID_(Fragment::sequence_id_t seqID, bool create_new, Fragment::timestamp_t timestamp = Fragment::InvalidTimestamp);
00443 bool hasFragments_(int buffer);
00444 void complete_buffer_(int buffer);
00445 bool bufferComparator(int bufA, int bufB);
00446 void check_pending_buffers_(std::unique_lock<std::mutex> const& lock);
00447
00448 void send_init_frag_();
00449 SharedMemoryManager broadcasts_;
00450 };
00451 }
00452
00453 #endif //ARTDAQ_DAQRATE_SHAREDMEMORYEVENTMANAGER_HH