artdaq  v3_12_02
FragmentWatcher_module.cc
1 // Class: FragmentWatcher
3 // Module Type: analyzer
4 // File: FragmentWatcher_module.cc
5 // Description: Collects and reports statistics on missing and empty fragments
6 //
7 // The model that is followed here is to publish to the metrics system
8 // the full history of what has happened so far. In that way, each update
9 // is self-contained. So, the map of fragment IDs that have missing or
10 // empty fragments will contain the total number of events in which each
11 // fragment ID was missing or empty.
12 //
13 // TRACE messages, though, contain a mix of per-event and overall results.
14 // To enable TLVL_TRACE messages that have overall resuts (for debugging),
15 // use 'tonM -n <appname>_FragmentWatcher 4'.
17 
18 #include "TRACE/tracemf.h"
19 #include "artdaq/DAQdata/Globals.hh"
20 #define TRACE_NAME (app_name + "_FragmentWatcher").c_str()
21 
22 #include "artdaq-core/Data/ContainerFragment.hh"
23 #include "artdaq-core/Data/Fragment.hh"
24 
25 #include "art/Framework/Core/EDAnalyzer.h"
26 #include "art/Framework/Core/ModuleMacros.h"
27 #include "art/Framework/Principal/Event.h"
28 #include "art/Framework/Principal/Handle.h"
29 
30 #include <bitset>
31 #include <iostream>
32 #include <map>
33 
34 #define TLVL_BAD_FRAGMENTS TLVL_WARNING
35 #define TLVL_EVENT_SUMMARY TLVL_TRACE
36 #define TLVL_EXPECTED_FRAGIDS 5
37 #define TLVL_BASIC_MODE 6
38 #define TLVL_FRACTIONAL_MODE 7
39 
40 namespace artdaq {
41 class FragmentWatcher;
42 }
43 
47 class artdaq::FragmentWatcher : public art::EDAnalyzer
48 {
49 public:
59  explicit FragmentWatcher(fhicl::ParameterSet const& pset);
63  ~FragmentWatcher() override;
64 
69  void analyze(art::Event const& evt) override;
70 
71 private:
72  FragmentWatcher(FragmentWatcher const&) = delete;
73  FragmentWatcher(FragmentWatcher&&) = delete;
74  FragmentWatcher& operator=(FragmentWatcher const&) = delete;
75  FragmentWatcher& operator=(FragmentWatcher&&) = delete;
76 
77  std::bitset<3> mode_bitset_;
78  int metrics_reporting_level_;
79 
80  int events_processed_;
81  std::set<int> expected_fragmentID_list_;
82 
83  int events_with_missing_fragments_;
84  int events_with_empty_fragments_;
85 
86  int events_with_10pct_missing_fragments_;
87  int events_with_10pct_empty_fragments_;
88  int events_with_50pct_missing_fragments_;
89  int events_with_50pct_empty_fragments_;
90 
91  std::map<int, int> missing_fragments_by_fragmentID_;
92  std::map<int, int> empty_fragments_by_fragmentID_;
93 
94  const int BASIC_COUNTS_MODE = 0;
95  const int FRACTIONAL_COUNTS_MODE = 1;
96  const int DETAILED_COUNTS_MODE = 2;
97 };
98 
99 artdaq::FragmentWatcher::FragmentWatcher(fhicl::ParameterSet const& pset)
100  : EDAnalyzer(pset)
101  , mode_bitset_(std::bitset<3>(pset.get<int>("mode_bitmask", 0x1)))
102  , metrics_reporting_level_(pset.get<int>("metrics_reporting_level", 1))
103  , events_processed_(0)
104  , expected_fragmentID_list_()
105  , events_with_missing_fragments_(0)
106  , events_with_empty_fragments_(0)
107  , events_with_10pct_missing_fragments_(0)
108  , events_with_10pct_empty_fragments_(0)
109  , events_with_50pct_missing_fragments_(0)
110  , events_with_50pct_empty_fragments_(0)
111  , missing_fragments_by_fragmentID_()
112  , empty_fragments_by_fragmentID_()
113 {
114 }
115 
117 {
118 }
119 
120 void artdaq::FragmentWatcher::analyze(art::Event const& evt)
121 {
122  events_processed_++;
123 
124  // get all the artdaq fragment collections in the event.
125  std::vector<art::Handle<std::vector<artdaq::Fragment>>> fragmentHandles;
126  fragmentHandles = evt.getMany<std::vector<artdaq::Fragment>>();
127 
128  std::set<int> missing_fragmentID_list_this_event(expected_fragmentID_list_);
129  // Check for missing Fragment IDs, updating the master list as necessary
130  for (auto const& hndl : fragmentHandles)
131  {
132  for (auto const& fragment : *hndl)
133  {
134  int fragID = fragment.fragmentID();
135  TLOG(TLVL_EXPECTED_FRAGIDS) << "Inserting fragment ID " << fragID << " into the list of expected_fragmentIDs.";
136  expected_fragmentID_list_.insert(fragID);
137  missing_fragmentID_list_this_event.erase(fragID);
138  }
139  }
140 
141  // track the number of missing fragments by fragment ID
142  for (int const& fragID : missing_fragmentID_list_this_event)
143  {
144  if (missing_fragments_by_fragmentID_.count(fragID) == 0)
145  {
146  missing_fragments_by_fragmentID_[fragID] = 1;
147  }
148  else
149  {
150  missing_fragments_by_fragmentID_[fragID] += 1;
151  }
152  }
153 
154  // check if this event has any Empty fragments
155  int empty_fragment_count_this_event = 0;
156  std::set<int> empty_fragmentID_list_this_event;
157  for (auto const& hndl : fragmentHandles)
158  {
159  std::string instance_name = hndl.provenance()->productInstanceName();
160  std::size_t found = instance_name.find("Empty");
161  if (found != std::string::npos)
162  {
163  empty_fragment_count_this_event += hndl->size();
164 
165  // track the number of empty fragments by fragment ID
166  for (auto const& fragment : *hndl)
167  {
168  int fragID = fragment.fragmentID();
169  if (empty_fragments_by_fragmentID_.count(fragID) == 0)
170  {
171  empty_fragments_by_fragmentID_[fragID] = 1;
172  }
173  else
174  {
175  empty_fragments_by_fragmentID_[fragID] += 1;
176  }
177  empty_fragmentID_list_this_event.insert(fragID);
178  }
179  }
180  }
181 
182  // common metric reporting for multiple modes
183  if (metricMan != nullptr && (mode_bitset_.test(BASIC_COUNTS_MODE) || mode_bitset_.test(FRACTIONAL_COUNTS_MODE)))
184  {
185  metricMan->sendMetric("EventsProcessed", events_processed_, "events", metrics_reporting_level_,
186  artdaq::MetricMode::LastPoint);
187  }
188 
189  size_t missing_fragment_count_this_event = missing_fragmentID_list_this_event.size();
190  size_t total_fragments_this_event = expected_fragmentID_list_.size() - missing_fragment_count_this_event;
191  TLOG(TLVL_EVENT_SUMMARY) << "Event " << evt.event() << ": this event: total_fragments=" << total_fragments_this_event
192  << ", missing_fragments=" << missing_fragment_count_this_event << ", empty_fragments="
193  << empty_fragment_count_this_event << " (" << events_processed_ << " events processed)";
194  // log TRACE message if there are missing fragments
195  if (missing_fragment_count_this_event > 0)
196  {
197  std::ostringstream oss;
198  bool firstLoop = true;
199  for (auto const& fragID : missing_fragmentID_list_this_event)
200  {
201  if (!firstLoop) { oss << ", "; }
202  oss << fragID;
203  firstLoop = false;
204  }
205  TLOG(TLVL_BAD_FRAGMENTS) << "Event " << evt.event() << ": total_fragments=" << total_fragments_this_event
206  << ", fragmentIDs for " << missing_fragment_count_this_event << " missing_fragments: " << oss.str();
207  }
208  // log TRACE message if there are empty fragments
209  if (!empty_fragmentID_list_this_event.empty())
210  {
211  std::ostringstream oss;
212  bool firstLoop = true;
213  for (auto const& fragID : empty_fragmentID_list_this_event)
214  {
215  if (!firstLoop) { oss << ", "; }
216  oss << fragID;
217  firstLoop = false;
218  }
219  TLOG(TLVL_BAD_FRAGMENTS) << "Event " << evt.event() << ": total_fragments=" << total_fragments_this_event
220  << ", fragmentIDs for " << empty_fragment_count_this_event << " empty_fragments: " << oss.str();
221  }
222 
223  // reporting for the BASIC_COUNTS_MODE
224  if (metricMan != nullptr && mode_bitset_.test(BASIC_COUNTS_MODE))
225  {
226  if (missing_fragment_count_this_event > 0) { ++events_with_missing_fragments_; }
227  if (empty_fragment_count_this_event > 0) { ++events_with_empty_fragments_; }
228 
229  metricMan->sendMetric("EventsWithMissingFragments", events_with_missing_fragments_, "events",
230  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
231  metricMan->sendMetric("EventsWithEmptyFragments", events_with_empty_fragments_, "events",
232  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
233 
234  TLOG(TLVL_BASIC_MODE) << "Event " << evt.event() << ": events_with_missing_fragments=" << events_with_missing_fragments_
235  << ", events_with_empty_fragments=" << events_with_empty_fragments_;
236  }
237 
238  // reporting for the FRACTIONAL_COUNTS_MODE
239  if (metricMan != nullptr && mode_bitset_.test(FRACTIONAL_COUNTS_MODE))
240  {
241  if (((static_cast<double>(missing_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 10.0)
242  {
243  ++events_with_10pct_missing_fragments_;
244  }
245  if (((static_cast<double>(missing_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 50.0)
246  {
247  ++events_with_50pct_missing_fragments_;
248  }
249 
250  if (((static_cast<double>(empty_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 10.0)
251  {
252  ++events_with_10pct_empty_fragments_;
253  }
254  if (((static_cast<double>(empty_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 50.0)
255  {
256  ++events_with_50pct_empty_fragments_;
257  }
258 
259  metricMan->sendMetric("EventsWith10PctMissingFragments", events_with_10pct_missing_fragments_, "events",
260  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
261  metricMan->sendMetric("EventsWith50PctMissingFragments", events_with_50pct_missing_fragments_, "events",
262  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
263 
264  metricMan->sendMetric("EventsWith10PctEmptyFragments", events_with_10pct_empty_fragments_, "events",
265  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
266  metricMan->sendMetric("EventsWith50PctEmptyFragments", events_with_50pct_empty_fragments_, "events",
267  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
268 
269  TLOG(TLVL_FRACTIONAL_MODE) << "Event " << evt.event() << ": events_with_10pct_missing_fragments=" << events_with_10pct_missing_fragments_
270  << ", events_with_10pct_empty_fragments=" << events_with_10pct_empty_fragments_;
271  TLOG(TLVL_FRACTIONAL_MODE) << "Event " << evt.event() << ": events_with_50pct_missing_fragments=" << events_with_50pct_missing_fragments_
272  << ", events_with_50pct_empty_fragments=" << events_with_50pct_empty_fragments_;
273  }
274 
275  // reporting for the DETAILED_COUNTS_MODE
276  if (metricMan != nullptr && mode_bitset_.test(DETAILED_COUNTS_MODE))
277  {
278  // only send an update when the missing or empty fragment counts, by FragmentID, changed,
279  // as indicated by a non-zero number of missing or empty fragments in this event
280  if (missing_fragment_count_this_event > 0 || empty_fragment_count_this_event > 0)
281  {
282  std::ostringstream oss;
283  oss << "<eventbuilder_snapshot app_name=\"" << app_name << "\"><events_processed>" << events_processed_
284  << "</events_processed>";
285  oss << "<missing_fragment_counts>";
286  for (auto const& mapIter : missing_fragments_by_fragmentID_)
287  {
288  oss << "<count fragment_id=" << mapIter.first << ">" << mapIter.second << "</count>";
289  }
290  oss << "</missing_fragment_counts>";
291  oss << "<empty_fragment_counts>";
292  for (auto const& mapIter : empty_fragments_by_fragmentID_)
293  {
294  oss << "<count fragment_id=" << mapIter.first << ">" << mapIter.second << "</count>";
295  }
296  oss << "</empty_fragment_counts>";
297  oss << "</eventbuilder_snapshot>";
298 
299  metricMan->sendMetric("EmptyFragmentSnapshot", oss.str(), "xml_string",
300  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
301  }
302  }
303 
304 #if 0
305  ==================================================== =
306 
307  event_builder_snapshot : {
308  name: "EventBuilder5"
309  timestamp : "20190408T124433"
310  events_built : 105
311 
312  sender_list : ["felix501", "felix501", "ssp101", "ssp102"]
313  valid_fragment_counts : [105, 105, 102, 104]
314  empty_fragment_counts : [0, 0, 2, 0]
315  missing_fragment_counts : [0, 0, 1, 1]
316  }
317 
318  ==================================================== =
319 
320  <event_builder_snapshot name = "EventBuilder5">
321  < timestamp>20190408T124433< / timestamp>
322  < events_built>105 < / events_built
323 
324  <sender_list>
325  <sender index = 0>felix501< / sender>
326  <sender index = 1>felix502< / sender>
327  <sender index = 2>ssp101< / sender>
328  <sender index = 3>ssp102< / sender>
329  < / sender_list>
330 
331  <valid_fragment_counts>
332  < count index = 0>105 < / count >
333  < count index = 1>105 < / count >
334  < count index = 2>102 < / count >
335  < count index = 3>104 < / count >
336  < / valid_fragment_counts>
337 
338  <empty_fragment_counts>
339  < count index = 2>2 < / count >
340  < / empty_fragment_counts>
341 
342  <missing_fragment_counts>
343  < count index = 2>1 < / count >
344  < count index = 3>1 < / count >
345  < / missing_fragment_counts>
346  < / event_builder_snapshot>
347 
348  ==================================================== =
349 #endif
350 }
351 
352 DEFINE_ART_MODULE(artdaq::FragmentWatcher) // NOLINT(performance-unnecessary-value-param)
FragmentWatcher(fhicl::ParameterSet const &pset)
FragmentWatcher Constructor.
An art::EDAnalyzer module which checks events for certain error conditions (missing fragments...
void analyze(art::Event const &evt) override
Analyze each event, using the configured mode bitmask.
~FragmentWatcher() override
Virtual Destructor. Shuts down MetricManager if one is present.