artdaq  3.12.07
FragmentWatcher_module.cc
1 // Class: FragmentWatcher
3 // Module Type: analyzer
4 // File: FragmentWatcher_module.cc
5 // Description: Collects and reports statistics on missing and empty fragments
6 //
7 // The model that is followed here is to publish to the metrics system
8 // the full history of what has happened so far. In that way, each update
9 // is self-contained. So, the map of fragment IDs that have missing or
10 // empty fragments will contain the total number of events in which each
11 // fragment ID was missing or empty.
12 //
13 // TRACE messages, though, contain a mix of per-event and overall results.
14 // To enable TLVL_TRACE messages that have overall resuts (for debugging),
15 // use 'tonM -n <appname>_FragmentWatcher 4'.
17 
18 #include "TRACE/tracemf.h"
19 #include "artdaq/DAQdata/Globals.hh"
20 #define TRACE_NAME (app_name + "_FragmentWatcher").c_str()
21 
22 #include "artdaq-core/Data/ContainerFragment.hh"
23 #include "artdaq-core/Data/Fragment.hh"
24 
25 #include "art/Framework/Core/EDAnalyzer.h"
26 #include "art/Framework/Core/ModuleMacros.h"
27 #include "art/Framework/Principal/Event.h"
28 #include "art/Framework/Principal/Handle.h"
29 
30 #include <bitset>
31 #include <iostream>
32 #include <map>
33 
34 #define TLVL_BAD_FRAGMENTS TLVL_WARNING
35 #define TLVL_EVENT_SUMMARY TLVL_TRACE
36 #define TLVL_EXPECTED_FRAGIDS 5
37 #define TLVL_BASIC_MODE 6
38 #define TLVL_FRACTIONAL_MODE 7
39 
40 namespace artdaq {
41 class FragmentWatcher;
42 }
43 
47 class artdaq::FragmentWatcher : public art::EDAnalyzer
48 {
49 public:
59  explicit FragmentWatcher(fhicl::ParameterSet const& pset);
63  ~FragmentWatcher() override;
64 
69  void analyze(art::Event const& evt) override;
70 
71 private:
72  FragmentWatcher(FragmentWatcher const&) = delete;
73  FragmentWatcher(FragmentWatcher&&) = delete;
74  FragmentWatcher& operator=(FragmentWatcher const&) = delete;
75  FragmentWatcher& operator=(FragmentWatcher&&) = delete;
76 
77  std::bitset<3> mode_bitset_;
78  int metrics_reporting_level_;
79 
80  int events_processed_;
81  std::set<int> expected_fragmentID_list_;
82 
83  int events_with_missing_fragments_;
84  int events_with_empty_fragments_;
85 
86  int events_with_10pct_missing_fragments_;
87  int events_with_10pct_empty_fragments_;
88  int events_with_50pct_missing_fragments_;
89  int events_with_50pct_empty_fragments_;
90 
91  std::map<int, int> missing_fragments_by_fragmentID_;
92  std::map<int, int> empty_fragments_by_fragmentID_;
93 
94  const int BASIC_COUNTS_MODE = 0;
95  const int FRACTIONAL_COUNTS_MODE = 1;
96  const int DETAILED_COUNTS_MODE = 2;
97 };
98 
99 artdaq::FragmentWatcher::FragmentWatcher(fhicl::ParameterSet const& pset)
100  : EDAnalyzer(pset)
101  , mode_bitset_(std::bitset<3>(pset.get<int>("mode_bitmask", 0x1)))
102  , metrics_reporting_level_(pset.get<int>("metrics_reporting_level", 1))
103  , events_processed_(0)
104  , expected_fragmentID_list_()
105  , events_with_missing_fragments_(0)
106  , events_with_empty_fragments_(0)
107  , events_with_10pct_missing_fragments_(0)
108  , events_with_10pct_empty_fragments_(0)
109  , events_with_50pct_missing_fragments_(0)
110  , events_with_50pct_empty_fragments_(0)
111  , missing_fragments_by_fragmentID_()
112  , empty_fragments_by_fragmentID_()
113 {
114  auto ids = pset.get<std::vector<int>>("fragment_ids", {});
115  for (auto& id : ids)
116  {
117  expected_fragmentID_list_.insert(id);
118  }
119 }
120 
122 {
123 }
124 
125 void artdaq::FragmentWatcher::analyze(art::Event const& evt)
126 {
127  events_processed_++;
128 
129  // get all the artdaq fragment collections in the event.
130  std::vector<art::Handle<std::vector<artdaq::Fragment>>> fragmentHandles;
131  fragmentHandles = evt.getMany<std::vector<artdaq::Fragment>>();
132 
133  std::set<int> missing_fragmentID_list_this_event(expected_fragmentID_list_);
134  // Check for missing Fragment IDs, updating the master list as necessary
135  for (auto const& hndl : fragmentHandles)
136  {
137  for (auto const& fragment : *hndl)
138  {
139  int fragID = fragment.fragmentID();
140  if(!expected_fragmentID_list_.count(fragID)) {
141  TLOG(TLVL_EXPECTED_FRAGIDS) << "Inserting fragment ID " << fragID << " into the list of expected_fragmentIDs.";
142  expected_fragmentID_list_.insert(fragID);
143  }
144  missing_fragmentID_list_this_event.erase(fragID);
145  }
146  }
147 
148  // track the number of missing fragments by fragment ID
149  for (int const& fragID : missing_fragmentID_list_this_event)
150  {
151  if (missing_fragments_by_fragmentID_.count(fragID) == 0)
152  {
153  missing_fragments_by_fragmentID_[fragID] = 1;
154  }
155  else
156  {
157  missing_fragments_by_fragmentID_[fragID] += 1;
158  }
159  }
160 
161  // check if this event has any Empty fragments
162  int empty_fragment_count_this_event = 0;
163  std::set<int> empty_fragmentID_list_this_event;
164  for (auto const& hndl : fragmentHandles)
165  {
166  std::string instance_name = hndl.provenance()->productInstanceName();
167  std::size_t found = instance_name.find("Empty");
168  if (found != std::string::npos)
169  {
170  empty_fragment_count_this_event += hndl->size();
171 
172  // track the number of empty fragments by fragment ID
173  for (auto const& fragment : *hndl)
174  {
175  int fragID = fragment.fragmentID();
176  if (empty_fragments_by_fragmentID_.count(fragID) == 0)
177  {
178  empty_fragments_by_fragmentID_[fragID] = 1;
179  }
180  else
181  {
182  empty_fragments_by_fragmentID_[fragID] += 1;
183  }
184  empty_fragmentID_list_this_event.insert(fragID);
185  }
186  }
187  }
188 
189  // common metric reporting for multiple modes
190  if (metricMan != nullptr && (mode_bitset_.test(BASIC_COUNTS_MODE) || mode_bitset_.test(FRACTIONAL_COUNTS_MODE)))
191  {
192  metricMan->sendMetric("EventsProcessed", events_processed_, "events", metrics_reporting_level_,
193  artdaq::MetricMode::LastPoint);
194  }
195 
196  size_t missing_fragment_count_this_event = missing_fragmentID_list_this_event.size();
197  size_t total_fragments_this_event = expected_fragmentID_list_.size() - missing_fragment_count_this_event;
198  TLOG(TLVL_EVENT_SUMMARY) << "Event " << evt.event() << ": this event: total_fragments=" << total_fragments_this_event
199  << ", missing_fragments=" << missing_fragment_count_this_event << ", empty_fragments="
200  << empty_fragment_count_this_event << " (" << events_processed_ << " events processed)";
201  // log TRACE message if there are missing fragments
202  if (missing_fragment_count_this_event > 0)
203  {
204  std::ostringstream oss;
205  bool firstLoop = true;
206  for (auto const& fragID : missing_fragmentID_list_this_event)
207  {
208  if (!firstLoop) { oss << ", "; }
209  oss << fragID;
210  firstLoop = false;
211  }
212  TLOG(TLVL_BAD_FRAGMENTS) << "Event " << evt.event() << ": total_fragments=" << total_fragments_this_event
213  << ", fragmentIDs for " << missing_fragment_count_this_event << " missing_fragments: " << oss.str();
214  }
215  // log TRACE message if there are empty fragments
216  if (!empty_fragmentID_list_this_event.empty())
217  {
218  std::ostringstream oss;
219  bool firstLoop = true;
220  for (auto const& fragID : empty_fragmentID_list_this_event)
221  {
222  if (!firstLoop) { oss << ", "; }
223  oss << fragID;
224  firstLoop = false;
225  }
226  TLOG(TLVL_BAD_FRAGMENTS) << "Event " << evt.event() << ": total_fragments=" << total_fragments_this_event
227  << ", fragmentIDs for " << empty_fragment_count_this_event << " empty_fragments: " << oss.str();
228  }
229 
230  // reporting for the BASIC_COUNTS_MODE
231  if (metricMan != nullptr && mode_bitset_.test(BASIC_COUNTS_MODE))
232  {
233  if (missing_fragment_count_this_event > 0) { ++events_with_missing_fragments_; }
234  if (empty_fragment_count_this_event > 0) { ++events_with_empty_fragments_; }
235 
236  metricMan->sendMetric("EventsWithMissingFragments", events_with_missing_fragments_, "events",
237  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
238  metricMan->sendMetric("EventsWithEmptyFragments", events_with_empty_fragments_, "events",
239  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
240 
241  TLOG(TLVL_BASIC_MODE) << "Event " << evt.event() << ": events_with_missing_fragments=" << events_with_missing_fragments_
242  << ", events_with_empty_fragments=" << events_with_empty_fragments_;
243  }
244 
245  // reporting for the FRACTIONAL_COUNTS_MODE
246  if (metricMan != nullptr && mode_bitset_.test(FRACTIONAL_COUNTS_MODE))
247  {
248  if (((static_cast<double>(missing_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 10.0)
249  {
250  ++events_with_10pct_missing_fragments_;
251  }
252  if (((static_cast<double>(missing_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 50.0)
253  {
254  ++events_with_50pct_missing_fragments_;
255  }
256 
257  if (((static_cast<double>(empty_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 10.0)
258  {
259  ++events_with_10pct_empty_fragments_;
260  }
261  if (((static_cast<double>(empty_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 50.0)
262  {
263  ++events_with_50pct_empty_fragments_;
264  }
265 
266  metricMan->sendMetric("EventsWith10PctMissingFragments", events_with_10pct_missing_fragments_, "events",
267  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
268  metricMan->sendMetric("EventsWith50PctMissingFragments", events_with_50pct_missing_fragments_, "events",
269  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
270 
271  metricMan->sendMetric("EventsWith10PctEmptyFragments", events_with_10pct_empty_fragments_, "events",
272  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
273  metricMan->sendMetric("EventsWith50PctEmptyFragments", events_with_50pct_empty_fragments_, "events",
274  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
275 
276  TLOG(TLVL_FRACTIONAL_MODE) << "Event " << evt.event() << ": events_with_10pct_missing_fragments=" << events_with_10pct_missing_fragments_
277  << ", events_with_10pct_empty_fragments=" << events_with_10pct_empty_fragments_;
278  TLOG(TLVL_FRACTIONAL_MODE) << "Event " << evt.event() << ": events_with_50pct_missing_fragments=" << events_with_50pct_missing_fragments_
279  << ", events_with_50pct_empty_fragments=" << events_with_50pct_empty_fragments_;
280  }
281 
282  // reporting for the DETAILED_COUNTS_MODE
283  if (metricMan != nullptr && mode_bitset_.test(DETAILED_COUNTS_MODE))
284  {
285  // only send an update when the missing or empty fragment counts, by FragmentID, changed,
286  // as indicated by a non-zero number of missing or empty fragments in this event
287  if (missing_fragment_count_this_event > 0 || empty_fragment_count_this_event > 0)
288  {
289  std::ostringstream oss;
290  oss << "<eventbuilder_snapshot app_name=\"" << app_name << "\"><events_processed>" << events_processed_
291  << "</events_processed>";
292  oss << "<missing_fragment_counts>";
293  for (auto const& mapIter : missing_fragments_by_fragmentID_)
294  {
295  oss << "<count fragment_id=" << mapIter.first << ">" << mapIter.second << "</count>";
296  }
297  oss << "</missing_fragment_counts>";
298  oss << "<empty_fragment_counts>";
299  for (auto const& mapIter : empty_fragments_by_fragmentID_)
300  {
301  oss << "<count fragment_id=" << mapIter.first << ">" << mapIter.second << "</count>";
302  }
303  oss << "</empty_fragment_counts>";
304  oss << "</eventbuilder_snapshot>";
305 
306  metricMan->sendMetric("EmptyFragmentSnapshot", oss.str(), "xml_string",
307  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
308  }
309  }
310 
311 #if 0
312  ==================================================== =
313 
314  event_builder_snapshot : {
315  name: "EventBuilder5"
316  timestamp : "20190408T124433"
317  events_built : 105
318 
319  sender_list : ["felix501", "felix501", "ssp101", "ssp102"]
320  valid_fragment_counts : [105, 105, 102, 104]
321  empty_fragment_counts : [0, 0, 2, 0]
322  missing_fragment_counts : [0, 0, 1, 1]
323  }
324 
325  ==================================================== =
326 
327  <event_builder_snapshot name = "EventBuilder5">
328  < timestamp>20190408T124433< / timestamp>
329  < events_built>105 < / events_built
330 
331  <sender_list>
332  <sender index = 0>felix501< / sender>
333  <sender index = 1>felix502< / sender>
334  <sender index = 2>ssp101< / sender>
335  <sender index = 3>ssp102< / sender>
336  < / sender_list>
337 
338  <valid_fragment_counts>
339  < count index = 0>105 < / count >
340  < count index = 1>105 < / count >
341  < count index = 2>102 < / count >
342  < count index = 3>104 < / count >
343  < / valid_fragment_counts>
344 
345  <empty_fragment_counts>
346  < count index = 2>2 < / count >
347  < / empty_fragment_counts>
348 
349  <missing_fragment_counts>
350  < count index = 2>1 < / count >
351  < count index = 3>1 < / count >
352  < / missing_fragment_counts>
353  < / event_builder_snapshot>
354 
355  ==================================================== =
356 #endif
357 }
358 
359 DEFINE_ART_MODULE(artdaq::FragmentWatcher) // NOLINT(performance-unnecessary-value-param)
FragmentWatcher(fhicl::ParameterSet const &pset)
FragmentWatcher Constructor.
An art::EDAnalyzer module which checks events for certain error conditions (missing fragments...
void analyze(art::Event const &evt) override
Analyze each event, using the configured mode bitmask.
~FragmentWatcher() override
Virtual Destructor. Shuts down MetricManager if one is present.