artdaq  v3_09_06a
FragmentWatcher_module.cc
1 // Class: FragmentWatcher
3 // Module Type: analyzer
4 // File: FragmentWatcher_module.cc
5 // Description: Collects and reports statistics on missing and empty fragments
6 //
7 // The model that is followed here is to publish to the metrics system
8 // the full history of what has happened so far. In that way, each update
9 // is self-contained. So, the map of fragment IDs that have missing or
10 // empty fragments will contain the total number of events in which each
11 // fragment ID was missing or empty.
12 //
13 // TRACE messages, though, contain a mix of per-event and overall results.
14 // To enable TLVL_TRACE messages that have overall resuts (for debugging),
15 // use 'tonM -n <appname>_FragmentWatcher 4'.
17 
18 #define TRACE_NAME (app_name + "_FragmentWatcher").c_str()
19 #include "artdaq/DAQdata/Globals.hh"
20 
21 #include "art/Framework/Core/EDAnalyzer.h"
22 #include "art/Framework/Core/ModuleMacros.h"
23 #include "art/Framework/Principal/Event.h"
24 #include "art/Framework/Principal/Handle.h"
25 
26 #include "artdaq-core/Data/ContainerFragment.hh"
27 #include "artdaq-core/Data/Fragment.hh"
28 
29 #include <bitset>
30 #include <iostream>
31 #include <map>
32 
33 #define TLVL_BAD_FRAGMENTS TLVL_WARNING
34 #define TLVL_EVENT_SUMMARY TLVL_TRACE
35 #define TLVL_EXPECTED_FRAGIDS 5
36 #define TLVL_BASIC_MODE 6
37 #define TLVL_FRACTIONAL_MODE 7
38 
39 namespace artdaq {
40 class FragmentWatcher;
41 }
42 
46 class artdaq::FragmentWatcher : public art::EDAnalyzer
47 {
48 public:
58  explicit FragmentWatcher(fhicl::ParameterSet const& pset);
62  ~FragmentWatcher() override;
63 
68  void analyze(art::Event const& evt) override;
69 
70 private:
71  FragmentWatcher(FragmentWatcher const&) = delete;
72  FragmentWatcher(FragmentWatcher&&) = delete;
73  FragmentWatcher& operator=(FragmentWatcher const&) = delete;
74  FragmentWatcher& operator=(FragmentWatcher&&) = delete;
75 
76  std::bitset<3> mode_bitset_;
77  int metrics_reporting_level_;
78 
79  int events_processed_;
80  std::set<int> expected_fragmentID_list_;
81 
82  int events_with_missing_fragments_;
83  int events_with_empty_fragments_;
84 
85  int events_with_10pct_missing_fragments_;
86  int events_with_10pct_empty_fragments_;
87  int events_with_50pct_missing_fragments_;
88  int events_with_50pct_empty_fragments_;
89 
90  std::map<int, int> missing_fragments_by_fragmentID_;
91  std::map<int, int> empty_fragments_by_fragmentID_;
92 
93  const int BASIC_COUNTS_MODE = 0;
94  const int FRACTIONAL_COUNTS_MODE = 1;
95  const int DETAILED_COUNTS_MODE = 2;
96 };
97 
98 artdaq::FragmentWatcher::FragmentWatcher(fhicl::ParameterSet const& pset)
99  : EDAnalyzer(pset)
100  , mode_bitset_(std::bitset<3>(pset.get<int>("mode_bitmask", 0x1)))
101  , metrics_reporting_level_(pset.get<int>("metrics_reporting_level", 1))
102  , events_processed_(0)
103  , expected_fragmentID_list_()
104  , events_with_missing_fragments_(0)
105  , events_with_empty_fragments_(0)
106  , events_with_10pct_missing_fragments_(0)
107  , events_with_10pct_empty_fragments_(0)
108  , events_with_50pct_missing_fragments_(0)
109  , events_with_50pct_empty_fragments_(0)
110  , missing_fragments_by_fragmentID_()
111  , empty_fragments_by_fragmentID_()
112 {
113 }
114 
116 {
117 }
118 
119 void artdaq::FragmentWatcher::analyze(art::Event const& evt)
120 {
121  events_processed_++;
122 
123  // get all the artdaq fragment collections in the event.
124  std::vector<art::Handle<std::vector<artdaq::Fragment> > > fragmentHandles;
125  evt.getManyByType(fragmentHandles);
126 
127  std::set<int> missing_fragmentID_list_this_event(expected_fragmentID_list_);
128  // Check for missing Fragment IDs, updating the master list as necessary
129  for (auto const& hndl : fragmentHandles)
130  {
131  for (auto const& fragment : *hndl)
132  {
133  int fragID = fragment.fragmentID();
134  TLOG(TLVL_EXPECTED_FRAGIDS) << "Inserting fragment ID " << fragID << " into the list of expected_fragmentIDs.";
135  expected_fragmentID_list_.insert(fragID);
136  missing_fragmentID_list_this_event.erase(fragID);
137  }
138  }
139 
140  // track the number of missing fragments by fragment ID
141  for (int const& fragID : missing_fragmentID_list_this_event)
142  {
143  if (missing_fragments_by_fragmentID_.count(fragID) == 0)
144  {
145  missing_fragments_by_fragmentID_[fragID] = 1;
146  }
147  else
148  {
149  missing_fragments_by_fragmentID_[fragID] += 1;
150  }
151  }
152 
153  // check if this event has any Empty fragments
154  int empty_fragment_count_this_event = 0;
155  std::set<int> empty_fragmentID_list_this_event;
156  for (auto const& hndl : fragmentHandles)
157  {
158  std::string instance_name = hndl.provenance()->productInstanceName();
159  std::size_t found = instance_name.find("Empty");
160  if (found != std::string::npos)
161  {
162  empty_fragment_count_this_event += hndl->size();
163 
164  // track the number of empty fragments by fragment ID
165  for (auto const& fragment : *hndl)
166  {
167  int fragID = fragment.fragmentID();
168  if (empty_fragments_by_fragmentID_.count(fragID) == 0)
169  {
170  empty_fragments_by_fragmentID_[fragID] = 1;
171  }
172  else
173  {
174  empty_fragments_by_fragmentID_[fragID] += 1;
175  }
176  empty_fragmentID_list_this_event.insert(fragID);
177  }
178  }
179  }
180 
181  // common metric reporting for multiple modes
182  if (metricMan != nullptr && (mode_bitset_.test(BASIC_COUNTS_MODE) || mode_bitset_.test(FRACTIONAL_COUNTS_MODE)))
183  {
184  metricMan->sendMetric("EventsProcessed", events_processed_, "events", metrics_reporting_level_,
185  artdaq::MetricMode::LastPoint);
186  }
187 
188  size_t missing_fragment_count_this_event = missing_fragmentID_list_this_event.size();
189  size_t total_fragments_this_event = expected_fragmentID_list_.size() - missing_fragment_count_this_event;
190  TLOG(TLVL_EVENT_SUMMARY) << "Event " << evt.event() << ": this event: total_fragments=" << total_fragments_this_event
191  << ", missing_fragments=" << missing_fragment_count_this_event << ", empty_fragments="
192  << empty_fragment_count_this_event << " (" << events_processed_ << " events processed)";
193  // log TRACE message if there are missing fragments
194  if (missing_fragment_count_this_event > 0)
195  {
196  std::ostringstream oss;
197  bool firstLoop = true;
198  for (auto const& fragID : missing_fragmentID_list_this_event)
199  {
200  if (!firstLoop) { oss << ", "; }
201  oss << fragID;
202  firstLoop = false;
203  }
204  TLOG(TLVL_BAD_FRAGMENTS) << "Event " << evt.event() << ": total_fragments=" << total_fragments_this_event
205  << ", fragmentIDs for " << missing_fragment_count_this_event << " missing_fragments: " << oss.str();
206  }
207  // log TRACE message if there are empty fragments
208  if (!empty_fragmentID_list_this_event.empty())
209  {
210  std::ostringstream oss;
211  bool firstLoop = true;
212  for (auto const& fragID : empty_fragmentID_list_this_event)
213  {
214  if (!firstLoop) { oss << ", "; }
215  oss << fragID;
216  firstLoop = false;
217  }
218  TLOG(TLVL_BAD_FRAGMENTS) << "Event " << evt.event() << ": total_fragments=" << total_fragments_this_event
219  << ", fragmentIDs for " << empty_fragment_count_this_event << " empty_fragments: " << oss.str();
220  }
221 
222  // reporting for the BASIC_COUNTS_MODE
223  if (metricMan != nullptr && mode_bitset_.test(BASIC_COUNTS_MODE))
224  {
225  if (missing_fragment_count_this_event > 0) { ++events_with_missing_fragments_; }
226  if (empty_fragment_count_this_event > 0) { ++events_with_empty_fragments_; }
227 
228  metricMan->sendMetric("EventsWithMissingFragments", events_with_missing_fragments_, "events",
229  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
230  metricMan->sendMetric("EventsWithEmptyFragments", events_with_empty_fragments_, "events",
231  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
232 
233  TLOG(TLVL_BASIC_MODE) << "Event " << evt.event() << ": events_with_missing_fragments=" << events_with_missing_fragments_
234  << ", events_with_empty_fragments=" << events_with_empty_fragments_;
235  }
236 
237  // reporting for the FRACTIONAL_COUNTS_MODE
238  if (metricMan != nullptr && mode_bitset_.test(FRACTIONAL_COUNTS_MODE))
239  {
240  if (((static_cast<double>(missing_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 10.0)
241  {
242  ++events_with_10pct_missing_fragments_;
243  }
244  if (((static_cast<double>(missing_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 50.0)
245  {
246  ++events_with_50pct_missing_fragments_;
247  }
248 
249  if (((static_cast<double>(empty_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 10.0)
250  {
251  ++events_with_10pct_empty_fragments_;
252  }
253  if (((static_cast<double>(empty_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 50.0)
254  {
255  ++events_with_50pct_empty_fragments_;
256  }
257 
258  metricMan->sendMetric("EventsWith10PctMissingFragments", events_with_10pct_missing_fragments_, "events",
259  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
260  metricMan->sendMetric("EventsWith50PctMissingFragments", events_with_50pct_missing_fragments_, "events",
261  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
262 
263  metricMan->sendMetric("EventsWith10PctEmptyFragments", events_with_10pct_empty_fragments_, "events",
264  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
265  metricMan->sendMetric("EventsWith50PctEmptyFragments", events_with_50pct_empty_fragments_, "events",
266  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
267 
268  TLOG(TLVL_FRACTIONAL_MODE) << "Event " << evt.event() << ": events_with_10pct_missing_fragments=" << events_with_10pct_missing_fragments_
269  << ", events_with_10pct_empty_fragments=" << events_with_10pct_empty_fragments_;
270  TLOG(TLVL_FRACTIONAL_MODE) << "Event " << evt.event() << ": events_with_50pct_missing_fragments=" << events_with_50pct_missing_fragments_
271  << ", events_with_50pct_empty_fragments=" << events_with_50pct_empty_fragments_;
272  }
273 
274  // reporting for the DETAILED_COUNTS_MODE
275  if (metricMan != nullptr && mode_bitset_.test(DETAILED_COUNTS_MODE))
276  {
277  // only send an update when the missing or empty fragment counts, by FragmentID, changed,
278  // as indicated by a non-zero number of missing or empty fragments in this event
279  if (missing_fragment_count_this_event > 0 || empty_fragment_count_this_event > 0)
280  {
281  std::ostringstream oss;
282  oss << "<eventbuilder_snapshot app_name=\"" << app_name << "\"><events_processed>" << events_processed_
283  << "</events_processed>";
284  oss << "<missing_fragment_counts>";
285  for (auto const& mapIter : missing_fragments_by_fragmentID_)
286  {
287  oss << "<count fragment_id=" << mapIter.first << ">" << mapIter.second << "</count>";
288  }
289  oss << "</missing_fragment_counts>";
290  oss << "<empty_fragment_counts>";
291  for (auto const& mapIter : empty_fragments_by_fragmentID_)
292  {
293  oss << "<count fragment_id=" << mapIter.first << ">" << mapIter.second << "</count>";
294  }
295  oss << "</empty_fragment_counts>";
296  oss << "</eventbuilder_snapshot>";
297 
298  metricMan->sendMetric("EmptyFragmentSnapshot", oss.str(), "xml_string",
299  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
300  }
301  }
302 
303 #if 0
304  ==================================================== =
305 
306  event_builder_snapshot : {
307  name: "EventBuilder5"
308  timestamp : "20190408T124433"
309  events_built : 105
310 
311  sender_list : ["felix501", "felix501", "ssp101", "ssp102"]
312  valid_fragment_counts : [105, 105, 102, 104]
313  empty_fragment_counts : [0, 0, 2, 0]
314  missing_fragment_counts : [0, 0, 1, 1]
315  }
316 
317  ==================================================== =
318 
319  <event_builder_snapshot name = "EventBuilder5">
320  < timestamp>20190408T124433< / timestamp>
321  < events_built>105 < / events_built
322 
323  <sender_list>
324  <sender index = 0>felix501< / sender>
325  <sender index = 1>felix502< / sender>
326  <sender index = 2>ssp101< / sender>
327  <sender index = 3>ssp102< / sender>
328  < / sender_list>
329 
330  <valid_fragment_counts>
331  < count index = 0>105 < / count >
332  < count index = 1>105 < / count >
333  < count index = 2>102 < / count >
334  < count index = 3>104 < / count >
335  < / valid_fragment_counts>
336 
337  <empty_fragment_counts>
338  < count index = 2>2 < / count >
339  < / empty_fragment_counts>
340 
341  <missing_fragment_counts>
342  < count index = 2>1 < / count >
343  < count index = 3>1 < / count >
344  < / missing_fragment_counts>
345  < / event_builder_snapshot>
346 
347  ==================================================== =
348 #endif
349 }
350 
351 DEFINE_ART_MODULE(artdaq::FragmentWatcher) // NOLINT(performance-unnecessary-value-param)
FragmentWatcher(fhicl::ParameterSet const &pset)
FragmentWatcher Constructor.
An art::EDAnalyzer module which checks events for certain error conditions (missing fragments...
void analyze(art::Event const &evt) override
Analyze each event, using the configured mode bitmask.
~FragmentWatcher() override
Virtual Destructor. Shuts down MetricManager if one is present.