artdaq  v3_10_03
FragmentWatcher_module.cc
1 // Class: FragmentWatcher
3 // Module Type: analyzer
4 // File: FragmentWatcher_module.cc
5 // Description: Collects and reports statistics on missing and empty fragments
6 //
7 // The model that is followed here is to publish to the metrics system
8 // the full history of what has happened so far. In that way, each update
9 // is self-contained. So, the map of fragment IDs that have missing or
10 // empty fragments will contain the total number of events in which each
11 // fragment ID was missing or empty.
12 //
13 // TRACE messages, though, contain a mix of per-event and overall results.
14 // To enable TLVL_TRACE messages that have overall resuts (for debugging),
15 // use 'tonM -n <appname>_FragmentWatcher 4'.
17 
18 #define TRACE_NAME (app_name + "_FragmentWatcher").c_str()
19 #include "artdaq/DAQdata/Globals.hh"
20 
21 #include "art/Framework/Core/EDAnalyzer.h"
22 #include "art/Framework/Core/ModuleMacros.h"
23 #include "art/Framework/Principal/Event.h"
24 #include "art/Framework/Principal/Handle.h"
25 
26 #include "artdaq-core/Data/ContainerFragment.hh"
27 #include "artdaq-core/Data/Fragment.hh"
28 
29 #include <bitset>
30 #include <iostream>
31 #include <map>
32 
33 #define TLVL_BAD_FRAGMENTS TLVL_WARNING
34 #define TLVL_EVENT_SUMMARY TLVL_TRACE
35 #define TLVL_EXPECTED_FRAGIDS 5
36 #define TLVL_BASIC_MODE 6
37 #define TLVL_FRACTIONAL_MODE 7
38 
39 namespace artdaq {
40 class FragmentWatcher;
41 }
42 
46 class artdaq::FragmentWatcher : public art::EDAnalyzer
47 {
48 public:
58  explicit FragmentWatcher(fhicl::ParameterSet const& pset);
62  ~FragmentWatcher() override;
63 
68  void analyze(art::Event const& evt) override;
69 
70 private:
71  FragmentWatcher(FragmentWatcher const&) = delete;
72  FragmentWatcher(FragmentWatcher&&) = delete;
73  FragmentWatcher& operator=(FragmentWatcher const&) = delete;
74  FragmentWatcher& operator=(FragmentWatcher&&) = delete;
75 
76  std::bitset<3> mode_bitset_;
77  int metrics_reporting_level_;
78 
79  int events_processed_;
80  std::set<int> expected_fragmentID_list_;
81 
82  int events_with_missing_fragments_;
83  int events_with_empty_fragments_;
84 
85  int events_with_10pct_missing_fragments_;
86  int events_with_10pct_empty_fragments_;
87  int events_with_50pct_missing_fragments_;
88  int events_with_50pct_empty_fragments_;
89 
90  std::map<int, int> missing_fragments_by_fragmentID_;
91  std::map<int, int> empty_fragments_by_fragmentID_;
92 
93  const int BASIC_COUNTS_MODE = 0;
94  const int FRACTIONAL_COUNTS_MODE = 1;
95  const int DETAILED_COUNTS_MODE = 2;
96 };
97 
98 artdaq::FragmentWatcher::FragmentWatcher(fhicl::ParameterSet const& pset)
99  : EDAnalyzer(pset)
100  , mode_bitset_(std::bitset<3>(pset.get<int>("mode_bitmask", 0x1)))
101  , metrics_reporting_level_(pset.get<int>("metrics_reporting_level", 1))
102  , events_processed_(0)
103  , expected_fragmentID_list_()
104  , events_with_missing_fragments_(0)
105  , events_with_empty_fragments_(0)
106  , events_with_10pct_missing_fragments_(0)
107  , events_with_10pct_empty_fragments_(0)
108  , events_with_50pct_missing_fragments_(0)
109  , events_with_50pct_empty_fragments_(0)
110  , missing_fragments_by_fragmentID_()
111  , empty_fragments_by_fragmentID_()
112 {
113 }
114 
116 {
117 }
118 
119 void artdaq::FragmentWatcher::analyze(art::Event const& evt)
120 {
121  events_processed_++;
122 
123  // get all the artdaq fragment collections in the event.
124  std::vector<art::Handle<std::vector<artdaq::Fragment>>> fragmentHandles;
125 #if ART_HEX_VERSION < 0x30900
126  evt.getManyByType(fragmentHandles);
127 #else
128  fragmentHandles = evt.getMany<std::vector<artdaq::Fragment>>();
129 #endif
130 
131  std::set<int> missing_fragmentID_list_this_event(expected_fragmentID_list_);
132  // Check for missing Fragment IDs, updating the master list as necessary
133  for (auto const& hndl : fragmentHandles)
134  {
135  for (auto const& fragment : *hndl)
136  {
137  int fragID = fragment.fragmentID();
138  TLOG(TLVL_EXPECTED_FRAGIDS) << "Inserting fragment ID " << fragID << " into the list of expected_fragmentIDs.";
139  expected_fragmentID_list_.insert(fragID);
140  missing_fragmentID_list_this_event.erase(fragID);
141  }
142  }
143 
144  // track the number of missing fragments by fragment ID
145  for (int const& fragID : missing_fragmentID_list_this_event)
146  {
147  if (missing_fragments_by_fragmentID_.count(fragID) == 0)
148  {
149  missing_fragments_by_fragmentID_[fragID] = 1;
150  }
151  else
152  {
153  missing_fragments_by_fragmentID_[fragID] += 1;
154  }
155  }
156 
157  // check if this event has any Empty fragments
158  int empty_fragment_count_this_event = 0;
159  std::set<int> empty_fragmentID_list_this_event;
160  for (auto const& hndl : fragmentHandles)
161  {
162  std::string instance_name = hndl.provenance()->productInstanceName();
163  std::size_t found = instance_name.find("Empty");
164  if (found != std::string::npos)
165  {
166  empty_fragment_count_this_event += hndl->size();
167 
168  // track the number of empty fragments by fragment ID
169  for (auto const& fragment : *hndl)
170  {
171  int fragID = fragment.fragmentID();
172  if (empty_fragments_by_fragmentID_.count(fragID) == 0)
173  {
174  empty_fragments_by_fragmentID_[fragID] = 1;
175  }
176  else
177  {
178  empty_fragments_by_fragmentID_[fragID] += 1;
179  }
180  empty_fragmentID_list_this_event.insert(fragID);
181  }
182  }
183  }
184 
185  // common metric reporting for multiple modes
186  if (metricMan != nullptr && (mode_bitset_.test(BASIC_COUNTS_MODE) || mode_bitset_.test(FRACTIONAL_COUNTS_MODE)))
187  {
188  metricMan->sendMetric("EventsProcessed", events_processed_, "events", metrics_reporting_level_,
189  artdaq::MetricMode::LastPoint);
190  }
191 
192  size_t missing_fragment_count_this_event = missing_fragmentID_list_this_event.size();
193  size_t total_fragments_this_event = expected_fragmentID_list_.size() - missing_fragment_count_this_event;
194  TLOG(TLVL_EVENT_SUMMARY) << "Event " << evt.event() << ": this event: total_fragments=" << total_fragments_this_event
195  << ", missing_fragments=" << missing_fragment_count_this_event << ", empty_fragments="
196  << empty_fragment_count_this_event << " (" << events_processed_ << " events processed)";
197  // log TRACE message if there are missing fragments
198  if (missing_fragment_count_this_event > 0)
199  {
200  std::ostringstream oss;
201  bool firstLoop = true;
202  for (auto const& fragID : missing_fragmentID_list_this_event)
203  {
204  if (!firstLoop) { oss << ", "; }
205  oss << fragID;
206  firstLoop = false;
207  }
208  TLOG(TLVL_BAD_FRAGMENTS) << "Event " << evt.event() << ": total_fragments=" << total_fragments_this_event
209  << ", fragmentIDs for " << missing_fragment_count_this_event << " missing_fragments: " << oss.str();
210  }
211  // log TRACE message if there are empty fragments
212  if (!empty_fragmentID_list_this_event.empty())
213  {
214  std::ostringstream oss;
215  bool firstLoop = true;
216  for (auto const& fragID : empty_fragmentID_list_this_event)
217  {
218  if (!firstLoop) { oss << ", "; }
219  oss << fragID;
220  firstLoop = false;
221  }
222  TLOG(TLVL_BAD_FRAGMENTS) << "Event " << evt.event() << ": total_fragments=" << total_fragments_this_event
223  << ", fragmentIDs for " << empty_fragment_count_this_event << " empty_fragments: " << oss.str();
224  }
225 
226  // reporting for the BASIC_COUNTS_MODE
227  if (metricMan != nullptr && mode_bitset_.test(BASIC_COUNTS_MODE))
228  {
229  if (missing_fragment_count_this_event > 0) { ++events_with_missing_fragments_; }
230  if (empty_fragment_count_this_event > 0) { ++events_with_empty_fragments_; }
231 
232  metricMan->sendMetric("EventsWithMissingFragments", events_with_missing_fragments_, "events",
233  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
234  metricMan->sendMetric("EventsWithEmptyFragments", events_with_empty_fragments_, "events",
235  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
236 
237  TLOG(TLVL_BASIC_MODE) << "Event " << evt.event() << ": events_with_missing_fragments=" << events_with_missing_fragments_
238  << ", events_with_empty_fragments=" << events_with_empty_fragments_;
239  }
240 
241  // reporting for the FRACTIONAL_COUNTS_MODE
242  if (metricMan != nullptr && mode_bitset_.test(FRACTIONAL_COUNTS_MODE))
243  {
244  if (((static_cast<double>(missing_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 10.0)
245  {
246  ++events_with_10pct_missing_fragments_;
247  }
248  if (((static_cast<double>(missing_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 50.0)
249  {
250  ++events_with_50pct_missing_fragments_;
251  }
252 
253  if (((static_cast<double>(empty_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 10.0)
254  {
255  ++events_with_10pct_empty_fragments_;
256  }
257  if (((static_cast<double>(empty_fragment_count_this_event) * 100.0) / static_cast<double>(expected_fragmentID_list_.size())) >= 50.0)
258  {
259  ++events_with_50pct_empty_fragments_;
260  }
261 
262  metricMan->sendMetric("EventsWith10PctMissingFragments", events_with_10pct_missing_fragments_, "events",
263  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
264  metricMan->sendMetric("EventsWith50PctMissingFragments", events_with_50pct_missing_fragments_, "events",
265  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
266 
267  metricMan->sendMetric("EventsWith10PctEmptyFragments", events_with_10pct_empty_fragments_, "events",
268  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
269  metricMan->sendMetric("EventsWith50PctEmptyFragments", events_with_50pct_empty_fragments_, "events",
270  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
271 
272  TLOG(TLVL_FRACTIONAL_MODE) << "Event " << evt.event() << ": events_with_10pct_missing_fragments=" << events_with_10pct_missing_fragments_
273  << ", events_with_10pct_empty_fragments=" << events_with_10pct_empty_fragments_;
274  TLOG(TLVL_FRACTIONAL_MODE) << "Event " << evt.event() << ": events_with_50pct_missing_fragments=" << events_with_50pct_missing_fragments_
275  << ", events_with_50pct_empty_fragments=" << events_with_50pct_empty_fragments_;
276  }
277 
278  // reporting for the DETAILED_COUNTS_MODE
279  if (metricMan != nullptr && mode_bitset_.test(DETAILED_COUNTS_MODE))
280  {
281  // only send an update when the missing or empty fragment counts, by FragmentID, changed,
282  // as indicated by a non-zero number of missing or empty fragments in this event
283  if (missing_fragment_count_this_event > 0 || empty_fragment_count_this_event > 0)
284  {
285  std::ostringstream oss;
286  oss << "<eventbuilder_snapshot app_name=\"" << app_name << "\"><events_processed>" << events_processed_
287  << "</events_processed>";
288  oss << "<missing_fragment_counts>";
289  for (auto const& mapIter : missing_fragments_by_fragmentID_)
290  {
291  oss << "<count fragment_id=" << mapIter.first << ">" << mapIter.second << "</count>";
292  }
293  oss << "</missing_fragment_counts>";
294  oss << "<empty_fragment_counts>";
295  for (auto const& mapIter : empty_fragments_by_fragmentID_)
296  {
297  oss << "<count fragment_id=" << mapIter.first << ">" << mapIter.second << "</count>";
298  }
299  oss << "</empty_fragment_counts>";
300  oss << "</eventbuilder_snapshot>";
301 
302  metricMan->sendMetric("EmptyFragmentSnapshot", oss.str(), "xml_string",
303  metrics_reporting_level_, artdaq::MetricMode::LastPoint);
304  }
305  }
306 
307 #if 0
308  ==================================================== =
309 
310  event_builder_snapshot : {
311  name: "EventBuilder5"
312  timestamp : "20190408T124433"
313  events_built : 105
314 
315  sender_list : ["felix501", "felix501", "ssp101", "ssp102"]
316  valid_fragment_counts : [105, 105, 102, 104]
317  empty_fragment_counts : [0, 0, 2, 0]
318  missing_fragment_counts : [0, 0, 1, 1]
319  }
320 
321  ==================================================== =
322 
323  <event_builder_snapshot name = "EventBuilder5">
324  < timestamp>20190408T124433< / timestamp>
325  < events_built>105 < / events_built
326 
327  <sender_list>
328  <sender index = 0>felix501< / sender>
329  <sender index = 1>felix502< / sender>
330  <sender index = 2>ssp101< / sender>
331  <sender index = 3>ssp102< / sender>
332  < / sender_list>
333 
334  <valid_fragment_counts>
335  < count index = 0>105 < / count >
336  < count index = 1>105 < / count >
337  < count index = 2>102 < / count >
338  < count index = 3>104 < / count >
339  < / valid_fragment_counts>
340 
341  <empty_fragment_counts>
342  < count index = 2>2 < / count >
343  < / empty_fragment_counts>
344 
345  <missing_fragment_counts>
346  < count index = 2>1 < / count >
347  < count index = 3>1 < / count >
348  < / missing_fragment_counts>
349  < / event_builder_snapshot>
350 
351  ==================================================== =
352 #endif
353 }
354 
355 DEFINE_ART_MODULE(artdaq::FragmentWatcher) // NOLINT(performance-unnecessary-value-param)
FragmentWatcher(fhicl::ParameterSet const &pset)
FragmentWatcher Constructor.
An art::EDAnalyzer module which checks events for certain error conditions (missing fragments...
void analyze(art::Event const &evt) override
Analyze each event, using the configured mode bitmask.
~FragmentWatcher() override
Virtual Destructor. Shuts down MetricManager if one is present.