artdaq  v3_02_00
SharedMemoryEventManager.hh
1 #ifndef ARTDAQ_DAQRATE_SHAREDMEMORYEVENTMANAGER_HH
2 #define ARTDAQ_DAQRATE_SHAREDMEMORYEVENTMANAGER_HH
3 
4 #include "artdaq/DAQdata/Globals.hh" // Before trace.h gets included in ConcurrentQueue (from GlobalQueue)
5 #include "artdaq-core/Core/SharedMemoryManager.hh"
6 #include "artdaq-core/Data/RawEvent.hh"
7 #include "artdaq/DAQrate/RequestSender.hh"
8 #include <set>
9 #include <deque>
10 #include <fstream>
11 #include <iomanip>
12 #include <sys/stat.h>
13 #include "fhiclcpp/fwd.h"
14 #include "artdaq/Application/StatisticsHelper.hh"
15 #include "artdaq/DAQrate/detail/ArtConfig.hh"
16 #define ART_SUPPORTS_DUPLICATE_EVENTS 0
17 
18 namespace artdaq {
19 
24  {
25  public:
30  art_config_file(fhicl::ParameterSet ps/*, uint32_t shm_key, uint32_t broadcast_key*/)
31  : dir_name_("/tmp/partition_" + std::to_string(Globals::GetPartitionNumber()))
32  , file_name_(dir_name_ + "/artConfig_" + std::to_string(my_rank) + "_" + std::to_string(artdaq::TimeUtils::gettimeofday_us()) + ".fcl")
33  {
34  mkdir(dir_name_.c_str(), S_IRWXU); // Allowed to fail if directory already exists
35 
36  std::ofstream of(file_name_, std::ofstream::trunc);
37  of << ps.to_string();
38 
39  //if (ps.has_key("services.NetMonTransportServiceInterface"))
40  //{
41  // of << " services.NetMonTransportServiceInterface.shared_memory_key: 0x" << std::hex << shm_key;
42  // of << " services.NetMonTransportServiceInterface.broadcast_shared_memory_key: 0x" << std::hex << broadcast_key;
43  // of << " services.NetMonTransportServiceInterface.rank: " << std::dec << my_rank;
44  //}
45  if (!ps.has_key("services.message"))
46  {
47  of << " services.message: { " << generateMessageFacilityConfiguration("art") << "} ";
48  }
49  //of << " source.shared_memory_key: 0x" << std::hex << shm_key;
50  //of << " source.broadcast_shared_memory_key: 0x" << std::hex << broadcast_key;
51  //of << " source.rank: " << std::dec << my_rank;
52  of.close();
53  }
55  {
56  remove(file_name_.c_str());
57  rmdir(dir_name_.c_str()); // Will only delete directory if no config files are left over
58  }
63  std::string getFileName() const { return file_name_; }
64  private:
65  std::string dir_name_;
66  std::string file_name_;
67  };
68 
72  class SharedMemoryEventManager : public SharedMemoryManager
73  {
74  public:
75  typedef RawEvent::run_id_t run_id_t;
76  typedef RawEvent::subrun_id_t subrun_id_t;
77  typedef Fragment::sequence_id_t sequence_id_t;
78  typedef std::map<sequence_id_t, RawEvent_ptr> EventMap;
79 
80  struct Config
81  {
82  fhicl::Atom<size_t> max_event_size_bytes{ fhicl::Name{ "max_event_size_bytes"}, fhicl::Comment{"Maximum event size (all Fragments), in bytes"} };
83  fhicl::Atom<size_t> stale_buffer_timeout_usec{ fhicl::Name{ "stale_buffer_timeout_usec"}, fhicl::Comment{"Maximum amount of time elapsed before a buffer is marked as abandoned. Time is reset each time an operation is performed on the buffer."}, 5000000 };
84  fhicl::Atom<bool> overwrite_mode{ fhicl::Name{ "overwrite_mode"}, fhicl::Comment{"Whether buffers are allowed to be overwritten when safe (state == Full or Reading)"}, false };
85  fhicl::Atom<bool> restart_crashed_art_processes{ fhicl::Name{"restart_crashed_art_processes"}, fhicl::Comment{"Whether to automatically restart art processes that fail for any reason"}, true };
86  fhicl::Atom<uint32_t> shared_memory_key{ fhicl::Name{ "shared_memory_key"}, fhicl::Comment{"Key to use for shared memory access"}, 0xBEE70000 + getpid() };
87  fhicl::Atom<size_t> buffer_count{ fhicl::Name{ "buffer_count"}, fhicl::Comment{"Number of events in the Shared Memory (incomplete + pending art)"} };
88  fhicl::Atom<size_t> max_fragment_size_bytes{ fhicl::Name{ "max_fragment_size_bytes"}, fhicl::Comment{" Maximum Fragment size, in bytes"} };
89  fhicl::Atom<size_t> event_queue_wait_time{ fhicl::Name{ "event_queue_wait_time"}, fhicl::Comment{"Amount of time (in seconds) an event can exist in shared memory before being released to art. Used as input to default parameter of \"stale_buffer_timeout_usec\"."}, 5 };
90  fhicl::Atom<bool> broadcast_mode{ fhicl::Name{ "broadcast_mode"}, fhicl::Comment{"When true, buffers are not marked Empty when read, but return to Full state. Buffers are overwritten in order received."}, false };
91  fhicl::Atom<size_t> art_analyzer_count{ fhicl::Name{ "art_analyzer_count"}, fhicl::Comment{"Number of art procceses to start"}, 1 };
92  fhicl::Atom<size_t> expected_fragments_per_event{ fhicl::Name{ "expected_fragments_per_event"}, fhicl::Comment{"Number of Fragments to expect per event"} };
93  fhicl::Atom<bool> update_run_ids_on_new_fragment{ fhicl::Name{ "update_run_ids_on_new_fragment"}, fhicl::Comment{"Whether the run and subrun ID of an event should be updated whenever a Fragment is added."}, true };
94  fhicl::Atom<bool> send_init_fragments{ fhicl::Name{ "send_init_fragments"}, fhicl::Comment{"Whether Init Fragments are expected to be sent to art. If true, a Warning message is printed when an Init Fragment is requested but none are available."}, true };
95  fhicl::Atom<int> incomplete_event_report_interval_ms{ fhicl::Name{ "incomplete_event_report_interval_ms"}, fhicl::Comment{"Interval at which an incomplete event report should be written"}, -1 };
96  fhicl::Atom<int> fragment_broadcast_timeout_ms{ fhicl::Name{ "fragment_broadcast_timeout_ms"}, fhicl::Comment{"Amount of time broadcast fragments should live in the broadcast shared memory segment"}, 3000 };
97  fhicl::Atom<double> minimum_art_lifetime_s{ fhicl::Name{ "minimum_art_lifetime_s"}, fhicl::Comment{"Amount of time that an art process should run to not be considered \"DOA\""}, 2.0 };
98  fhicl::Atom<size_t> expected_art_event_processing_time_us{ fhicl::Name{ "expected_art_event_processing_time_us"}, fhicl::Comment{"During shutdown, SMEM will wait for this amount of time while it is checking that the art threads are done reading buffers."}, 100000 };
99  fhicl::Atom<uint32_t> broadcast_shared_memory_key{ fhicl::Name{ "broadcast_shared_memory_key"}, fhicl::Comment{""}, 0xCEE70000 + getpid() };
100  fhicl::Atom<size_t> broadcast_buffer_count{ fhicl::Name{ "broadcast_buffer_count"}, fhicl::Comment{"Buffers in the broadcast shared memory segment"}, 10 };
101  fhicl::Atom<size_t> broadcast_buffer_size{ fhicl::Name{ "broadcast_buffer_size"}, fhicl::Comment{"Size of the buffers in the broadcast shared memory segment"}, 0x100000 };
102  fhicl::Atom<bool> use_art{ fhicl::Name{ "use_art"}, fhicl::Comment{"Whether to start and manage art threads (Sets art_analyzer count to 0 and overwrite_mode to true when false)"}, true };
103 
104  fhicl::TableFragment<artdaq::RequestSender::Config> requestSenderConfig;
105  };
106 #if MESSAGEFACILITY_HEX_VERSION >= 0x20103
107  using Parameters = fhicl::WrappedTable<Config>;
108 #endif
109 
143  SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset);
147  virtual ~SharedMemoryEventManager();
148 
149  private:
156  bool AddFragment(detail::RawFragmentHeader frag, void* dataPtr);
157 
158  public:
166  bool AddFragment(FragmentPtr frag, size_t timeout_usec, FragmentPtr& outfrag);
167 
174  RawDataType* WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable = false);
175 
180  void DoneWritingFragment(detail::RawFragmentHeader frag);
181 
186  size_t GetIncompleteEventCount() { return active_buffers_.size(); }
187 
192  size_t GetPendingEventCount() { return pending_buffers_.size(); }
193 
198  size_t GetLockedBufferCount() { return GetBuffersOwnedByManager().size(); }
199 
204  size_t GetArtEventCount() { return subrun_event_count_; }
205 
212  size_t GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type = Fragment::InvalidFragmentType);
213 
220  size_t GetFragmentCountInBuffer(int buffer, Fragment::type_t type = Fragment::InvalidFragmentType);
221 
225  void RunArt(std::shared_ptr<art_config_file> config_file, pid_t& pid_out);
229  void StartArt();
230 
236  pid_t StartArtProcess(fhicl::ParameterSet pset);
237 
242  void ShutdownArtProcesses(std::set<pid_t> pids);
243 
250  void ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun = 0, int n_art_processes = -1);
251 
261  bool endOfData();
262 
267  void startRun(run_id_t runID);
268 
272  void startSubrun();
273 
278  run_id_t runID() const { return run_id_; }
279 
284  subrun_id_t subrunID() const { return subrun_id_; }
285 
290  bool endRun();
291 
296  bool endSubrun();
297 
302  void rolloverSubrun(sequence_id_t boundary);
303 
307  void sendMetrics();
308 
313  void setRequestMode(detail::RequestMessageMode mode) { if (requests_) requests_->SetRequestMode(mode); }
314 
319  void setOverwrite(bool overwrite) { overwrite_mode_ = overwrite; }
320 
324  void SetInitFragment(FragmentPtr frag);
325 
330  uint32_t GetBroadcastKey() { return broadcasts_.GetKey(); }
331 
336  RawDataType* GetDroppedDataAddress() { return dropped_data_->dataBegin(); }
337 
338  private:
339  size_t num_art_processes_;
340  size_t const num_fragments_per_event_;
341  size_t const queue_size_;
342  run_id_t run_id_;
343  subrun_id_t subrun_id_;
344  sequence_id_t subrun_rollover_event_;
345  sequence_id_t last_released_event_;
346 
347  std::set<int> active_buffers_;
348  std::set<int> pending_buffers_;
349  std::unordered_map<Fragment::sequence_id_t, size_t> released_incomplete_events_;
350 
351  bool update_run_ids_;
352  bool overwrite_mode_;
353  bool send_init_fragments_;
354  bool running_;
355 
356  std::unordered_map<int, std::atomic<int>> buffer_writes_pending_;
357  std::unordered_map<int, std::mutex> buffer_mutexes_;
358  static std::mutex sequence_id_mutex_;
359 
360  int incomplete_event_report_interval_ms_;
361  std::chrono::steady_clock::time_point last_incomplete_event_report_time_;
362  int broadcast_timeout_ms_;
363 
364  std::atomic<int> run_event_count_;
365  std::atomic<int> run_incomplete_event_count_;
366  std::atomic<int> subrun_event_count_;
367  std::atomic<int> subrun_incomplete_event_count_;
368 
369  std::set<pid_t> art_processes_;
370  std::atomic<bool> restart_art_;
371  bool always_restart_art_;
372  fhicl::ParameterSet current_art_pset_;
373  std::shared_ptr<art_config_file> current_art_config_file_;
374  double minimum_art_lifetime_s_;
375  size_t art_event_processing_time_us_;
376 
377  std::unique_ptr<RequestSender> requests_;
378  fhicl::ParameterSet data_pset_;
379 
380  FragmentPtr init_fragment_;
381  FragmentPtr dropped_data_;
382 
383  bool broadcastFragment_(FragmentPtr frag, FragmentPtr& outFrag);
384 
385  detail::RawEventHeader* getEventHeader_(int buffer);
386 
387  int getBufferForSequenceID_(Fragment::sequence_id_t seqID, bool create_new, Fragment::timestamp_t timestamp = Fragment::InvalidTimestamp);
388  bool hasFragments_(int buffer);
389  void complete_buffer_(int buffer);
390  bool bufferComparator(int bufA, int bufB);
391  void check_pending_buffers_(std::unique_lock<std::mutex> const& lock = std::unique_lock<std::mutex>(sequence_id_mutex_));
392 
393  void send_init_frag_();
394  SharedMemoryManager broadcasts_;
395  };
396  }
397 
398 #endif //ARTDAQ_DAQRATE_SHAREDMEMORYEVENTMANAGER_HH
art_config_file wraps a temporary file used to configure art
void RunArt(std::shared_ptr< art_config_file > config_file, pid_t &pid_out)
Run an art instance, recording the return codes and restarting it until the end flag is raised...
size_t GetLockedBufferCount()
Returns the number of buffers currently owned by this manager.
virtual ~SharedMemoryEventManager()
SharedMemoryEventManager Destructor.
The SharedMemoryEventManager is a SharedMemoryManger which tracks events as they are built...
Fragment::sequence_id_t sequence_id_t
Copy Fragment::sequence_id_t into local scope.
void ReconfigureArt(fhicl::ParameterSet art_pset, run_id_t newRun=0, int n_art_processes=-1)
Restart all art processes, using the given fhicl code to configure the new art processes.
pid_t StartArtProcess(fhicl::ParameterSet pset)
Start one art process.
RawDataType * WriteFragmentHeader(detail::RawFragmentHeader frag, bool dropIfNoBuffersAvailable=false)
Get a pointer to a reserved memory area for the given Fragment header.
void setRequestMode(detail::RequestMessageMode mode)
Set the RequestMessageMode for all outgoing data requests.
size_t GetArtEventCount()
Returns the number of events sent to art this subrun.
RawEvent::run_id_t run_id_t
Copy RawEvent::run_id_t into local scope.
RawDataType * GetDroppedDataAddress()
Gets the address of the &quot;dropped data&quot; fragment. Used for testing.
size_t GetFragmentCount(Fragment::sequence_id_t seqID, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in an event.
size_t GetPendingEventCount()
Returns the number of events which are complete but waiting on lower sequenced events to finish...
void StartArt()
Start all the art processes.
run_id_t runID() const
Get the current Run number.
void ShutdownArtProcesses(std::set< pid_t > pids)
Shutdown a set of art processes.
void SetInitFragment(FragmentPtr frag)
Set the stored Init fragment, if one has not yet been set already.
subrun_id_t subrunID() const
Get the current subrun number.
void rolloverSubrun(sequence_id_t boundary)
Rollover the subrun after the specified event.
std::map< sequence_id_t, RawEvent_ptr > EventMap
An EventMap is a map of RawEvent_ptr objects, keyed by sequence ID.
RequestMessageMode
Mode used to indicate current run conditions to the request receiver.
void sendMetrics()
Send metrics to the MetricManager, if one has been instantiated in the application.
void startSubrun()
Start a new Subrun, incrementing the subrun number.
size_t GetIncompleteEventCount()
Returns the number of buffers which contain data but are not yet complete.
SharedMemoryEventManager(fhicl::ParameterSet pset, fhicl::ParameterSet art_pset)
SharedMemoryEventManager Constructor.
bool endSubrun()
Send an EndOfSubRunFragment to the art thread.
bool endRun()
Send an EndOfRunFragment to the art thread.
void setOverwrite(bool overwrite)
Set the overwrite flag (non-reliable data transfer) for the Shared Memory.
std::string getFileName() const
Get the path of the temporary file.
The artdaq::Globals class contains several variables which are useful across the entire artdaq system...
Definition: Globals.hh:31
void DoneWritingFragment(detail::RawFragmentHeader frag)
Used to indicate that the given Fragment is now completely in the buffer. Will check for buffer compl...
uint32_t GetBroadcastKey()
Gets the shared memory key of the broadcast SharedMemoryManager.
bool endOfData()
Indicate that the end of input has been reached to the art processes.
RawEvent::subrun_id_t subrun_id_t
Copy RawEvent::subrun_id_t into local scope.
art_config_file(fhicl::ParameterSet ps)
art_config_file Constructor
void startRun(run_id_t runID)
Start a Run.
size_t GetFragmentCountInBuffer(int buffer, Fragment::type_t type=Fragment::InvalidFragmentType)
Get the count of Fragments of a given type in a buffer.