artdaq  v3_10_02
RootDAQOutFile.cc
1 #include "artdaq/ArtModules/RootDAQOutput-s81/RootDAQOutFile.h"
2 // vim: set sw=2 expandtab :
3 
4 #include "art/Framework/Core/OutputFileGranularity.h"
5 #include "art/Framework/IO/ClosingCriteria.h"
6 #include "art/Framework/IO/FileStatsCollector.h"
7 #include "art/Framework/Principal/EventPrincipal.h"
8 #include "art/Framework/Principal/ResultsPrincipal.h"
9 #include "art/Framework/Principal/RunPrincipal.h"
10 #include "art/Framework/Principal/SubRunPrincipal.h"
11 #include "art/Framework/Services/System/DatabaseConnection.h"
12 #include "art/Persistency/Provenance/ProcessHistoryRegistry.h"
13 #include "art/Version/GetReleaseVersion.h"
14 #include "art_root_io/DropMetaData.h"
15 #include "art_root_io/GetFileFormatEra.h"
16 #include "art_root_io/GetFileFormatVersion.h"
17 #include "art_root_io/RootDB/SQLErrMsg.h"
18 #include "art_root_io/RootDB/TKeyVFSOpenPolicy.h"
19 #include "art_root_io/RootFileBlock.h"
20 #include "art_root_io/checkDictionaries.h"
21 #include "art_root_io/detail/KeptProvenance.h"
22 #include "art_root_io/detail/getObjectRequireDict.h"
23 #include "artdaq/DAQdata/Globals.hh"
24 #include "boost/date_time/posix_time/posix_time.hpp"
25 #include "canvas/Persistency/Provenance/BranchChildren.h"
26 #include "canvas/Persistency/Provenance/BranchType.h"
27 #include "canvas/Persistency/Provenance/EventAuxiliary.h"
28 #include "canvas/Persistency/Provenance/EventID.h"
29 #include "canvas/Persistency/Provenance/FileFormatVersion.h"
30 #include "canvas/Persistency/Provenance/History.h"
31 #include "canvas/Persistency/Provenance/ParameterSetBlob.h"
32 #include "canvas/Persistency/Provenance/Parentage.h"
33 #include "canvas/Persistency/Provenance/ParentageRegistry.h"
34 #include "canvas/Persistency/Provenance/ProcessHistoryID.h"
35 #include "canvas/Persistency/Provenance/ProductStatus.h"
36 #include "canvas/Persistency/Provenance/ResultsAuxiliary.h"
37 #include "canvas/Persistency/Provenance/RunAuxiliary.h"
38 #include "canvas/Persistency/Provenance/SubRunAuxiliary.h"
39 #include "canvas/Persistency/Provenance/rootNames.h"
40 #include "canvas/Utilities/Exception.h"
41 #include "canvas_root_io/Utilities/DictionaryChecker.h"
42 #include "cetlib/canonical_string.h"
43 #include "cetlib/container_algorithms.h"
44 #include "cetlib/exempt_ptr.h"
45 #include "cetlib/sqlite/Ntuple.h"
46 #include "cetlib/sqlite/Transaction.h"
47 #include "cetlib/sqlite/create_table.h"
48 #include "cetlib/sqlite/exec.h"
49 #include "cetlib/sqlite/insert.h"
50 #include "fhiclcpp/ParameterSet.h"
51 #include "fhiclcpp/ParameterSetID.h"
52 #include "fhiclcpp/ParameterSetRegistry.h"
53 #include "tracemf.h" // TLOG
54 #define TRACE_NAME (app_name + "_RootDAQOutFile").c_str()
55 #include "hep_concurrency/RecursiveMutex.h"
56 
57 #include "Rtypes.h"
58 #include "TBranchElement.h"
59 #include "TClass.h"
60 #include "TFile.h"
61 #include "TTree.h"
62 
63 #include <fcntl.h> // posix_fadvise POSIX_FADV_DONTNEED
64 #include <sys/sysinfo.h> // sysinfo(sysinfo*)
65 #include <algorithm>
66 #include <utility>
67 #include <vector>
68 
69 using namespace cet;
70 using namespace std;
71 using namespace hep::concurrency;
72 
73 using art::BranchType;
75 using art::rootNames::metaBranchRootName;
76 
77 namespace {
78 
79 void create_table(sqlite3* const db,
80  string const& name,
81  vector<string> const& columns,
82  string const& suffix = {})
83 {
84  if (columns.empty())
85  {
86  throw art::Exception(art::errors::LogicError) // NOLINT(cert-err60-cpp)
87  << "Number of sqlite columns specified for table: " << name << '\n'
88  << "is zero.\n";
89  }
90  string ddl = "DROP TABLE IF EXISTS " + name +
91  "; "
92  "CREATE TABLE " +
93  name + "(" + columns.front();
94  for_each(columns.begin() + 1, columns.end(), [&ddl](auto const& col) {
95  ddl += "," + col;
96  });
97  ddl += ") ";
98  ddl += suffix;
99  ddl += ";";
100  sqlite::exec(db, ddl);
101 }
102 
103 void insert_eventRanges_row(sqlite3_stmt* stmt,
104  art::SubRunNumber_t const sr,
105  art::EventNumber_t const b,
106  art::EventNumber_t const e)
107 {
108  sqlite3_bind_int64(stmt, 1, sr);
109  sqlite3_bind_int64(stmt, 2, b);
110  sqlite3_bind_int64(stmt, 3, e);
111  sqlite3_step(stmt);
112  sqlite3_reset(stmt);
113 }
114 
115 void insert_rangeSets_eventSets_row(sqlite3_stmt* stmt,
116  unsigned const rsid,
117  unsigned const esid)
118 {
119  sqlite3_bind_int64(stmt, 1, rsid);
120  sqlite3_bind_int64(stmt, 2, esid);
121  sqlite3_step(stmt);
122  sqlite3_reset(stmt);
123 }
124 
125 unsigned
126 getNewRangeSetID(sqlite3* db,
127  art::BranchType const bt,
128  art::RunNumber_t const r)
129 {
130  sqlite::insert_into(db, art::BranchTypeToString(bt) + "RangeSets")
131  .values(r);
132  return sqlite3_last_insert_rowid(db);
133 }
134 
135 vector<unsigned>
136 getExistingRangeSetIDs(sqlite3* db, art::RangeSet const& rs)
137 {
138  vector<unsigned> rangeSetIDs;
139  cet::transform_all(rs, back_inserter(rangeSetIDs), [db](auto const& range) {
140  sqlite::query_result<unsigned> r;
141  r << sqlite::select("ROWID")
142  .from(db, "EventRanges")
143  .where("SubRun=" + to_string(range.subRun()) +
144  " AND "
145  "begin=" +
146  to_string(range.begin()) +
147  " AND "
148  "end=" +
149  to_string(range.end()));
150  return unique_value(r);
151  });
152  return rangeSetIDs;
153 }
154 
155 void insertIntoEventRanges(sqlite3* db, art::RangeSet const& rs)
156 {
157  sqlite::Transaction txn{db};
158  sqlite3_stmt* stmt{nullptr};
159  string const ddl{
160  "INSERT INTO EventRanges(SubRun, begin, end) "
161  "VALUES(?, ?, ?);"};
162  sqlite3_prepare_v2(db, ddl.c_str(), -1, &stmt, nullptr);
163  for (auto const& range : rs)
164  {
165  insert_eventRanges_row(stmt, range.subRun(), range.begin(), range.end());
166  }
167  sqlite3_finalize(stmt);
168  txn.commit();
169 }
170 
171 void insertIntoJoinTable(sqlite3* db,
172  art::BranchType const bt,
173  unsigned const rsID,
174  vector<unsigned> const& eventRangesIDs)
175 {
176  sqlite::Transaction txn{db};
177  sqlite3_stmt* stmt{nullptr};
178  string const ddl{
179  "INSERT INTO " + art::BranchTypeToString(bt) +
180  "RangeSets_EventRanges(RangeSetsID, EventRangesID) Values(?,?);"};
181  sqlite3_prepare_v2(db, ddl.c_str(), -1, &stmt, nullptr);
182  cet::for_all(eventRangesIDs, [stmt, rsID](auto const eventRangeID) {
183  insert_rangeSets_eventSets_row(stmt, rsID, eventRangeID);
184  });
185  sqlite3_finalize(stmt);
186  txn.commit();
187 }
188 
189 void maybeInvalidateRangeSet(BranchType const bt,
190  art::RangeSet const& principalRS,
191  art::RangeSet& productRS)
192 {
193  assert(principalRS.is_sorted());
194  assert(productRS.is_sorted());
195  if (!productRS.is_valid())
196  {
197  return;
198  }
199  if (bt == art::InRun && productRS.is_full_run())
200  {
201  return;
202  }
203  if (bt == art::InSubRun && productRS.is_full_subRun())
204  {
205  return;
206  }
207  if (productRS.ranges().empty())
208  {
209  return;
210  }
211  auto const r = productRS.run();
212  auto const& productFront = productRS.ranges().front();
213  if (!principalRS.contains(r, productFront.subRun(), productFront.begin()))
214  {
215  productRS = art::RangeSet::invalid();
216  }
217 }
218 
219 // The purpose of 'maybeInvalidateRangeSet' is to support the
220 // following situation. Suppose process 1 creates three files with
221 // one Run product each, all corresponding to the same Run. Let's
222 // call the individual Run product instances in the three separate
223 // files as A, B, and C. Now suppose that the three files serve as
224 // inputs to process 2, where a concatenation is being performed AND
225 // ALSO an output file switch. Process 2 results in two output
226 // files, and now, in process 3, we concatenate the outputs from
227 // process 2. The situation would look like this:
228 //
229 // Process 1: [A] [B] [C]
230 // \ / \ /
231 // Process 2: [A + B] [B + C]
232 // \ / \ /
233 // D=agg(A,B) | | E=agg(B,C)
234 // \ /
235 // Process 3: [D + E]
236 //
237 // Notice the complication in process 3: product 'B' will be
238 // aggregated twice: once with A, and once with C. Whenever the
239 // output from process 3 is read as input to another process, the
240 // fetched product will be equivalent to A+2B+C.
241 //
242 // To avoid this situation, we compare the RangeSet of the product
243 // with the RangeSet of the in-memory RunAuxiliary. If the
244 // beginning of B's RangeSet is not contained within the auxiliary's
245 // RangeSet, then a dummy product with an invalid RangeSet is
246 // written to disk. Instead of the diagram above, we have:
247 //
248 // Process 1: [A] [B] [C]
249 // \ / \ /
250 // Process 2: [A + B] [x + C]
251 // \ / \ /
252 // D=agg(A,B) | | E=agg(x,C)=C
253 // \ /
254 // Process 3: [D + E]
255 //
256 // where 'x' represent a dummy product. Upon aggregating D and E,
257 // we obtain the correctly formed A+B+C product.
258 template<BranchType BT>
259 art::RangeSet
260 getRangeSet(art::OutputHandle const& oh,
261  art::RangeSet const& principalRS,
262  bool const producedInThisProcess)
263 {
264  if constexpr (!art::detail::range_sets_supported(BT))
265  {
266  {
267  }
268  return art::RangeSet::invalid();
269  }
270 
271  auto rs = oh.isValid() ? oh.rangeOfValidity() : art::RangeSet::invalid();
272  // Because a user can specify (e.g.):
273  // r.put(move(myProd), art::runFragment(myRangeSet));
274  // products that are produced in this process can have valid, yet
275  // arbitrary RangeSets. We therefore never invalidate a RangeSet
276  // that corresponds to a product produced in this process.
277  //
278  // It is possible for a user to specify a RangeSet which does not
279  // correspond AT ALL to the in-memory auxiliary RangeSet. In that
280  // case, users should not expect to be able to retrieve products
281  // for which no corresponding events or sub-runs were processed.
282  if (!producedInThisProcess)
283  {
284  maybeInvalidateRangeSet(BT, principalRS, rs);
285  }
286  return rs;
287 }
288 
289 template<BranchType BT>
290 void setProductRangeSetID(art::RangeSet const& rs,
291  sqlite3* db,
292  art::EDProduct* product,
293  map<unsigned, unsigned>& checksumToIndexLookup)
294 {
295  if constexpr (!art::detail::range_sets_supported(BT))
296  {
297  {
298  }
299  return;
300  }
301 
302  if (!rs.is_valid())
303  { // Invalid range-sets not written to DB
304  return;
305  }
306  // Set range sets for SubRun and Run products
307  auto it = checksumToIndexLookup.find(rs.checksum());
308  if (it != checksumToIndexLookup.cend())
309  {
310  product->setRangeSetID(it->second);
311  }
312  else
313  {
314  unsigned const rsID = getNewRangeSetID(db, BT, rs.run());
315  product->setRangeSetID(rsID);
316  checksumToIndexLookup.emplace(rs.checksum(), rsID);
317  insertIntoEventRanges(db, rs);
318  auto const& eventRangesIDs = getExistingRangeSetIDs(db, rs);
319  insertIntoJoinTable(db, BT, rsID, eventRangesIDs);
320  }
321 }
322 
323 bool maxCriterionSpecified(art::ClosingCriteria const& cc)
324 {
325  auto fp = mem_fn(&art::ClosingCriteria::fileProperties);
326  return (fp(cc).nEvents() !=
327  art::ClosingCriteria::Defaults::unsigned_max()) ||
328  (fp(cc).nSubRuns() !=
329  art::ClosingCriteria::Defaults::unsigned_max()) ||
330  (fp(cc).nRuns() != art::ClosingCriteria::Defaults::unsigned_max()) ||
331  (fp(cc).size() != art::ClosingCriteria::Defaults::size_max()) ||
332  (fp(cc).age().count() !=
333  art::ClosingCriteria::Defaults::seconds_max());
334 }
335 
336 } // unnamed namespace
337 
338 namespace art {
339 
340 RootDAQOutFile::OutputItem::~OutputItem() = default;
341 
342 RootDAQOutFile::OutputItem::OutputItem(BranchDescription bd)
343  : branchDescription_{std::move(bd)}, product_{nullptr}
344 {}
345 
346 string const&
347 RootDAQOutFile::OutputItem::branchName() const
348 {
349  return branchDescription_.branchName();
350 }
351 
352 bool RootDAQOutFile::OutputItem::operator<(OutputItem const& rh) const
353 {
354  return branchDescription_ < rh.branchDescription_;
355 }
356 
357 // Part of static interface.
358 bool RootDAQOutFile::shouldFastClone(bool const fastCloningSet,
359  bool const fastCloning,
360  bool const wantAllEvents,
361  ClosingCriteria const& cc)
362 {
363  bool result = fastCloning;
364  mf::LogInfo("FastCloning")
365  << "Initial fast cloning configuration "
366  << (fastCloningSet ? "(user-set): " : "(from default): ") << boolalpha
367  << fastCloning;
368  if (fastCloning && !wantAllEvents)
369  {
370  result = false;
371  mf::LogWarning("FastCloning")
372  << "Fast cloning deactivated due to presence of\n"
373  << "event selection configuration.";
374  }
375  if (fastCloning && maxCriterionSpecified(cc) &&
376  cc.granularity() < Granularity::InputFile)
377  {
378  result = false;
379  mf::LogWarning("FastCloning")
380  << "Fast cloning deactivated due to request to allow\n"
381  << "output file switching at an Event, SubRun, or Run boundary.";
382  }
383  return result;
384 }
385 
386 RootDAQOutFile::RootDAQOutFile(OutputModule* om,
387  string const& fileName,
388  ClosingCriteria const& fileSwitchCriteria,
389  int const compressionLevel,
390  unsigned freePercent,
391  unsigned freeMB,
392  int64_t const saveMemoryObjectThreshold,
393  int64_t const treeMaxVirtualSize,
394  int const splitLevel,
395  int const basketSize,
396  DropMetaData dropMetaData,
397  bool const dropMetaDataForDroppedData,
398  bool const fastCloningRequested)
399  : mutex_{"RootDAQOutFile::mutex_"}
400  , compressionLevel_{compressionLevel}
401  , freePercent_{freePercent}
402  , freeMB_{freeMB}
403  , saveMemoryObjectThreshold_{saveMemoryObjectThreshold}
404  , treeMaxVirtualSize_{treeMaxVirtualSize}
405  , splitLevel_{splitLevel}
406  , basketSize_{basketSize}
407  , dropMetaData_{dropMetaData}
408  , descriptionsToPersist_{{}}
409  , selectedOutputItemList_{{}}
410 {
411  om_ = om;
412  file_ = fileName;
413  fileSwitchCriteria_ = fileSwitchCriteria;
414  status_ = OutputFileStatus::Closed;
415  dropMetaDataForDroppedData_ = dropMetaDataForDroppedData;
416  fastCloningEnabledAtConstruction_ = fastCloningRequested;
417  wasFastCloned_ = false;
418  filePtr_.reset(
419  TFile::Open(file_.c_str(), "recreate", "", compressionLevel));
420  // Don't split metadata tree or event description tree
421  metaDataTree_ = RootOutputTree::makeTTree(
422  filePtr_.get(), rootNames::metaDataTreeName(), 0);
423  fileIndexTree_ = RootOutputTree::makeTTree(
424  filePtr_.get(), rootNames::fileIndexTreeName(), 0);
425  parentageTree_ = RootOutputTree::makeTTree(
426  filePtr_.get(), rootNames::parentageTreeName(), 0);
427  // Create the tree that will carry (event) History objects.
428  eventHistoryTree_ = RootOutputTree::makeTTree(
429  filePtr_.get(), rootNames::eventHistoryTreeName(), splitLevel);
430  if (eventHistoryTree_ == nullptr)
431  {
432  throw Exception(errors::FatalRootError) // NOLINT(cert-err60-cpp)
433  << "Failed to create the tree for History objects\n";
434  }
435  pEventAux_ = nullptr;
436  pSubRunAux_ = nullptr;
437  pRunAux_ = nullptr;
438  pResultsAux_ = nullptr;
439  pEventProductProvenanceVector_ = &eventProductProvenanceVector_;
440  pSubRunProductProvenanceVector_ = &subRunProductProvenanceVector_;
441  pRunProductProvenanceVector_ = &runProductProvenanceVector_;
442  pResultsProductProvenanceVector_ = &resultsProductProvenanceVector_;
443  pHistory_ = new History;
444  if (eventHistoryTree_->Branch(rootNames::eventHistoryBranchName().c_str(),
445  &pHistory_,
446  basketSize,
447  0) == nullptr)
448  {
449  throw Exception(errors::FatalRootError) // NOLINT(cert-err60-cpp)
450  << "Failed to create a branch for History in the output file\n";
451  }
452  delete pHistory_;
453  pHistory_ = nullptr;
454  treePointers_[0] =
455  make_unique<RootOutputTree>(filePtr_.get(),
456  InEvent,
457  pEventAux_,
458  pEventProductProvenanceVector_,
459  basketSize,
460  splitLevel,
461  treeMaxVirtualSize,
462  saveMemoryObjectThreshold);
463  treePointers_[1] =
464  make_unique<RootOutputTree>(filePtr_.get(),
465  InSubRun,
466  pSubRunAux_,
467  pSubRunProductProvenanceVector_,
468  basketSize,
469  splitLevel,
470  treeMaxVirtualSize,
471  saveMemoryObjectThreshold);
472  treePointers_[2] = make_unique<RootOutputTree>(filePtr_.get(),
473  InRun,
474  pRunAux_,
475  pRunProductProvenanceVector_,
476  basketSize,
477  splitLevel,
478  treeMaxVirtualSize,
479  saveMemoryObjectThreshold);
480  treePointers_[3] =
481  make_unique<RootOutputTree>(filePtr_.get(),
482  InResults,
483  pResultsAux_,
484  pResultsProductProvenanceVector_,
485  basketSize,
486  splitLevel,
487  treeMaxVirtualSize,
488  saveMemoryObjectThreshold);
489  dataTypeReported_ = false;
490  rootFileDB_.reset(
491  ServiceHandle<DatabaseConnection> {}->get<TKeyVFSOpenPolicy>(
492  "RootFileDB",
493  filePtr_.get(),
494  SQLITE_OPEN_CREATE | SQLITE_OPEN_READWRITE));
495  subRunRSID_ = -1U;
496  runRSID_ = -1U;
497  beginTime_ = chrono::steady_clock::now();
498  // Check that dictionaries for the auxiliaries exist
499  root::DictionaryChecker checker;
500  checker.checkDictionaries<EventAuxiliary>();
501  checker.checkDictionaries<SubRunAuxiliary>();
502  checker.checkDictionaries<RunAuxiliary>();
503  checker.checkDictionaries<ResultsAuxiliary>();
504  checker.reportMissingDictionaries();
505 
506  createDatabaseTables();
507  TLOG(TLVL_DEBUG + 0) << "RootDAQOutFile ctor complete";
508 }
509 
510 art::RootDAQOutFile::~RootDAQOutFile()
511 {
512  struct sysinfo info;
513  int sts = sysinfo(&info);
514  auto free_percent = static_cast<unsigned>(info.freeram * 100 / info.totalram);
515  auto free_MB = static_cast<unsigned>(info.freeram * info.mem_unit >> 20); // round down (1024.9 => 1024 MB)
516  TRACE(TLVL_DEBUG + 0, "~RootDAQOutFile free %%%u %.1fMB (%u) buffers=%fGB mem_unit=%u", // NOLINT
517  free_percent, static_cast<float>(info.freeram * info.mem_unit / (1024 * 1024.0)),
518  free_MB, static_cast<float>(info.bufferram * info.mem_unit / (1024 * 1024 * 1024.0)), info.mem_unit);
519  if (free_percent < freePercent_ || free_MB < freeMB_)
520  {
521  TLOG(TLVL_DEBUG + 0) << "RootDAQOutFile Flush/DONTNEED";
522  filePtr_->Flush();
523  sts = posix_fadvise(filePtr_->GetFd(), 0, 0 /*len,0=all*/, POSIX_FADV_DONTNEED);
524  }
525  TLOG(TLVL_DEBUG + 0) << "~RootDAQOutFile complete sts=" << sts;
526 }
527 
528 void art::RootDAQOutFile::createDatabaseTables()
529 {
530  // Event ranges
531  create_table(*rootFileDB_,
532  "EventRanges",
533  {"SubRun INTEGER",
534  "begin INTEGER",
535  "end INTEGER",
536  "UNIQUE (SubRun,begin,end) ON CONFLICT IGNORE"});
537  // SubRun range sets
538  using namespace cet::sqlite;
539  create_table(*rootFileDB_, "SubRunRangeSets", column<int>{"Run"});
540  create_table(*rootFileDB_,
541  "SubRunRangeSets_EventRanges",
542  {"RangeSetsID INTEGER",
543  "EventRangesID INTEGER",
544  "PRIMARY KEY(RangeSetsID,EventRangesID)"},
545  "WITHOUT ROWID");
546  // Run range sets
547  create_table(*rootFileDB_, "RunRangeSets", column<int>{"Run"});
548  create_table(*rootFileDB_,
549  "RunRangeSets_EventRanges",
550  {"RangeSetsID INTEGER",
551  "EventRangesID INTEGER",
552  "PRIMARY KEY(RangeSetsID,EventRangesID)"},
553  "WITHOUT ROWID");
554 }
555 
556 void RootDAQOutFile::setFileStatus(OutputFileStatus const ofs)
557 {
558  RecursiveMutexSentry sentry{mutex_, __func__};
559  status_ = ofs;
560 }
561 
562 string const&
563 RootDAQOutFile::currentFileName() const
564 {
565  RecursiveMutexSentry sentry{mutex_, __func__};
566  return file_;
567 }
568 
569 void RootDAQOutFile::selectProducts()
570 {
571  RecursiveMutexSentry sentry{mutex_, __func__};
572  auto selectProductsToWrite = [this](BranchType const bt) {
573  auto& items = selectedOutputItemList_[bt]; // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index)
574  for (auto const& pr : om_->keptProducts()[bt])
575  {
576  auto const& pd = pr.second;
577  // Persist Results products only if they have been produced by
578  // the current process.
579  if (bt == InResults && !pd.produced())
580  {
581  continue;
582  }
583  checkDictionaries(pd);
584  // Although the transient flag is already checked when
585  // OutputModule::doSelectProducts is called, it can be flipped
586  // to 'true' after the BranchDescription transients have been
587  // fluffed, which happens during the checkDictionaries call.
588  if (pd.transient())
589  {
590  continue;
591  }
592  items.emplace(pd);
593  }
594  for (auto const& val : items)
595  {
596  treePointers_[bt]->addOutputBranch(val.branchDescription_,
597  val.product_);
598  }
599  };
600  for_each_branch_type(selectProductsToWrite);
601 }
602 
603 void RootDAQOutFile::beginInputFile(RootFileBlock const* rfb,
604  bool const fastCloneFromOutputModule)
605 {
606  RecursiveMutexSentry sentry{mutex_, __func__};
607  // FIXME: the logic here is nasty.
608  bool shouldFastClone{fastCloningEnabledAtConstruction_ &&
609  fastCloneFromOutputModule && (rfb != nullptr)};
610  // Create output branches, and then redo calculation to determine if
611  // fast cloning should be done.
612  selectProducts();
613  if (shouldFastClone &&
614  !treePointers_[InEvent]->checkSplitLevelAndBasketSize(rfb->tree()))
615  {
616  mf::LogWarning("FastCloning")
617  << "Fast cloning deactivated for this input file due to "
618  << "splitting level and/or basket size.";
619  shouldFastClone = false;
620  }
621  else if ((rfb != nullptr) && rfb->tree() &&
622  rfb->tree()->GetCurrentFile()->GetVersion() < 60001)
623  {
624  mf::LogWarning("FastCloning")
625  << "Fast cloning deactivated for this input file due to "
626  << "ROOT version used to write it (< 6.00/01)\n"
627  "having a different splitting policy.";
628  shouldFastClone = false;
629  }
630  if (shouldFastClone && rfb->fileFormatVersion().value_ < 10)
631  {
632  mf::LogWarning("FastCloning")
633  << "Fast cloning deactivated for this input file due to "
634  << "reading in file that has a different ProductID schema.";
635  shouldFastClone = false;
636  }
637  if (shouldFastClone && !fastCloningEnabledAtConstruction_)
638  {
639  mf::LogWarning("FastCloning")
640  << "Fast cloning reactivated for this input file.";
641  }
642  treePointers_[InEvent]->beginInputFile(shouldFastClone);
643  auto tree = ((rfb != nullptr) && rfb->tree()) ? rfb->tree() : nullptr;
644  wasFastCloned_ = treePointers_[InEvent]->fastCloneTree(tree);
645 }
646 
647 void RootDAQOutFile::incrementInputFileNumber()
648 {
649  RecursiveMutexSentry sentry{mutex_, __func__};
650  fp_.update_inputFile();
651 }
652 
653 void RootDAQOutFile::respondToCloseInputFile(FileBlock const& /*unused*/)
654 {
655  RecursiveMutexSentry sentry{mutex_, __func__};
656  cet::for_all(treePointers_, [](auto const& p) { p->setEntries(); });
657 }
658 
659 bool RootDAQOutFile::requestsToCloseFile()
660 {
661  RecursiveMutexSentry sentry{mutex_, __func__};
662  using namespace chrono;
663  unsigned int constexpr oneK{1024u};
664  fp_.updateSize(filePtr_->GetSize() / oneK);
665  fp_.updateAge(duration_cast<seconds>(steady_clock::now() - beginTime_));
666  return fileSwitchCriteria_.should_close(fp_);
667 }
668 
669 void RootDAQOutFile::writeOne(EventPrincipal const& e)
670 {
671  RecursiveMutexSentry sentry{mutex_, __func__};
672  TLOG(TLVL_TRACE) << "Start of RootDAQOutFile::writeOne";
673  // Auxiliary branch.
674  // Note: pEventAux_ must be set before calling fillBranches
675  // since it gets written out in that routine.
676  pEventAux_ = &e.eventAux();
677  // Because getting the data may cause an exception to be
678  // thrown we want to do that first before writing anything
679  // to the file about this event.
680  fillBranches<InEvent>(e, pEventProductProvenanceVector_);
681  // History branch.
682  History historyForOutput{e.history()};
683  historyForOutput.addEventSelectionEntry(om_->selectorConfig());
684  pHistory_ = &historyForOutput;
685  int sz = eventHistoryTree_->Fill();
686  if (sz <= 0)
687  {
688  throw Exception(errors::FatalRootError) // NOLINT(cert-err60-cpp)
689  << "Failed to fill the History tree for event: " << e.eventID()
690  << "\nTTree::Fill() returned " << sz << " bytes written." << endl;
691  }
692  // Add the dataType to the job report if it hasn't already been done
693  if (!dataTypeReported_)
694  {
695  string dataType{"MC"};
696  if (pEventAux_->isRealData())
697  {
698  dataType = "Data";
699  }
700  dataTypeReported_ = true;
701  }
702  pHistory_ = &e.history();
703  // Add event to index
704  fileIndex_.addEntry(pEventAux_->eventID(), fp_.eventEntryNumber());
705  fp_.update_event();
706  TLOG(TLVL_TRACE) << "End of RootDAQOutFile::writeOne";
707 }
708 
709 void RootDAQOutFile::writeSubRun(SubRunPrincipal const& sr)
710 {
711  RecursiveMutexSentry sentry{mutex_, __func__};
712  pSubRunAux_ = &sr.subRunAux();
713  pSubRunAux_->setRangeSetID(subRunRSID_);
714  fillBranches<InSubRun>(sr, pSubRunProductProvenanceVector_);
715  fileIndex_.addEntry(EventID::invalidEvent(pSubRunAux_->subRunID()),
716  fp_.subRunEntryNumber());
717  fp_.update_subRun(status_);
718 }
719 
720 void RootDAQOutFile::writeRun(RunPrincipal const& r)
721 {
722  RecursiveMutexSentry sentry{mutex_, __func__};
723  pRunAux_ = &r.runAux();
724  pRunAux_->setRangeSetID(runRSID_);
725  fillBranches<InRun>(r, pRunProductProvenanceVector_);
726  fileIndex_.addEntry(EventID::invalidEvent(pRunAux_->runID()),
727  fp_.runEntryNumber());
728  fp_.update_run(status_);
729 }
730 
731 void RootDAQOutFile::writeParentageRegistry()
732 {
733  RecursiveMutexSentry sentry{mutex_, __func__};
734  auto pid = root::getObjectRequireDict<ParentageID>();
735  ParentageID const* hash = &pid;
736  if (parentageTree_->Branch(
737  rootNames::parentageIDBranchName().c_str(), &hash, basketSize_, 0) == nullptr)
738  {
739  throw Exception(errors::FatalRootError) // NOLINT(cert-err60-cpp)
740  << "Failed to create a branch for ParentageIDs in the output file";
741  }
742  hash = nullptr;
743  auto par = root::getObjectRequireDict<Parentage>();
744  Parentage const* desc = &par;
745  if (parentageTree_->Branch(
746  rootNames::parentageBranchName().c_str(), &desc, basketSize_, 0) == nullptr)
747  {
748  throw Exception(errors::FatalRootError) // NOLINT(cert-err60-cpp)
749  << "Failed to create a branch for Parentages in the output file";
750  }
751  desc = nullptr;
752  for (auto const& pr : ParentageRegistry::get())
753  {
754  hash = &pr.first;
755  desc = &pr.second;
756  parentageTree_->Fill();
757  }
758  parentageTree_->SetBranchAddress(rootNames::parentageIDBranchName().c_str(),
759  nullptr);
760  parentageTree_->SetBranchAddress(rootNames::parentageBranchName().c_str(),
761  nullptr);
762 }
763 
764 void RootDAQOutFile::writeFileFormatVersion()
765 {
766  RecursiveMutexSentry sentry{mutex_, __func__};
767  FileFormatVersion const ver{getFileFormatVersion(), getFileFormatEra()};
768  auto const* pver = &ver;
769  TBranch* b = metaDataTree_->Branch(
770  metaBranchRootName<FileFormatVersion>(), &pver, basketSize_, 0);
771  // FIXME: Turn this into a throw!
772  assert(b);
773  b->Fill();
774 }
775 
776 void RootDAQOutFile::writeFileIndex()
777 {
778  RecursiveMutexSentry sentry{mutex_, __func__};
779  fileIndex_.sortBy_Run_SubRun_Event();
780  FileIndex::Element elem{};
781  auto const* findexElemPtr = &elem;
782  TBranch* b = fileIndexTree_->Branch(
783  metaBranchRootName<FileIndex::Element>(), &findexElemPtr, basketSize_, 0);
784  // FIXME: Turn this into a throw!
785  assert(b);
786  for (auto& entry : fileIndex_)
787  {
788  findexElemPtr = &entry;
789  b->Fill();
790  }
791  b->SetAddress(nullptr);
792 }
793 
794 void RootDAQOutFile::writeEventHistory()
795 {
796  RecursiveMutexSentry sentry{mutex_, __func__};
797  RootOutputTree::writeTTree(eventHistoryTree_);
798 }
799 
800 void RootDAQOutFile::writeProcessConfigurationRegistry()
801 {
802  // We don't do this yet; currently we're storing a slightly
803  // bloated ProcessHistoryRegistry.
804 }
805 
806 void RootDAQOutFile::writeProcessHistoryRegistry()
807 {
808  RecursiveMutexSentry sentry{mutex_, __func__};
809  ProcessHistoryMap pHistMap;
810  for (auto const& pr : ProcessHistoryRegistry::get())
811  {
812  pHistMap.emplace(pr);
813  }
814  auto const* p = &pHistMap;
815  TBranch* b = metaDataTree_->Branch(
816  metaBranchRootName<ProcessHistoryMap>(), &p, basketSize_, 0);
817  if (b == nullptr)
818  {
819  throw Exception(errors::LogicError) // NOLINT(cert-err60-cpp)
820  << "Unable to locate required "
821  "ProcessHistoryMap branch in output "
822  "metadata tree.\n";
823  }
824  b->Fill();
825 }
826 
827 void RootDAQOutFile::writeFileCatalogMetadata(
828  FileStatsCollector const& stats,
829  FileCatalogMetadata::collection_type const& md,
830  FileCatalogMetadata::collection_type const& ssmd)
831 {
832  RecursiveMutexSentry sentry{mutex_, __func__};
833  using namespace cet::sqlite;
834  Ntuple<string, string> fileCatalogMetadata{
835  *rootFileDB_, "FileCatalog_metadata", {{"Name", "Value"}}, true};
836  Transaction txn{*rootFileDB_};
837  for (auto const& kv : md)
838  {
839  fileCatalogMetadata.insert(kv.first, kv.second);
840  }
841 
842  // Add our own specific information: File format and friends.
843  fileCatalogMetadata.insert("file_format", "\"artroot\"");
844 
845  // File start time.
846  namespace bpt = boost::posix_time;
847  auto formatted_time = [](auto const& t) {
848  return cet::canonical_string(bpt::to_iso_extended_string(t));
849  };
850  fileCatalogMetadata.insert("start_time",
851  formatted_time(stats.outputFileOpenTime()));
852  // File "end" time: now, since file is not actually closed yet.
853  fileCatalogMetadata.insert(
854  "end_time",
855  formatted_time(boost::posix_time::second_clock::universal_time()));
856  // Run/subRun information.
857  if (!stats.seenSubRuns().empty())
858  {
859  auto I = find_if(md.crbegin(), md.crend(), [](auto const& p) {
860  return p.first == "art.run_type";
861  });
862  if (I != md.crend())
863  {
864  ostringstream buf;
865  buf << "[ ";
866  for (auto const& srid : stats.seenSubRuns())
867  {
868  buf << "[ " << srid.run() << ", " << srid.subRun() << ", "
869  << cet::canonical_string(I->second) << " ], ";
870  }
871  // Rewind over last delimiter.
872  buf.seekp(-2, ios_base::cur);
873  buf << " ]";
874  fileCatalogMetadata.insert("runs", buf.str());
875  }
876  }
877  // Number of events.
878  fileCatalogMetadata.insert("event_count",
879  std::to_string(stats.eventsThisFile()));
880  fileCatalogMetadata.insert("first_event",
881  std::to_string(stats.lowestEventID().event()));
882  fileCatalogMetadata.insert("last_event",
883  std::to_string(stats.highestEventID().event()));
884  // File parents.
885  if (!stats.parents().empty())
886  {
887  ostringstream pstring;
888  pstring << "[ ";
889  for (auto const& parent : stats.parents())
890  {
891  pstring << cet::canonical_string(parent) << ", ";
892  }
893  // Rewind over last delimiter.
894  pstring.seekp(-2, ios_base::cur);
895  pstring << " ]";
896  fileCatalogMetadata.insert("parents", pstring.str());
897  }
898 
899  // The following need to be encapsulated in an art table
900  // first_event and last_event.
901  auto eidToTuple = [](EventID const& eid) -> string {
902  ostringstream eidStr;
903  eidStr << "[ " << eid.run() << ", " << eid.subRun() << ", " << eid.event()
904  << " ]";
905  return eidStr.str();
906  };
907  fileCatalogMetadata.insert("art.first_event",
908  eidToTuple(stats.lowestEventID()));
909  fileCatalogMetadata.insert("art.last_event",
910  eidToTuple(stats.highestEventID()));
911  fileCatalogMetadata.insert("art.file_format_era",
912  cet::canonical_string(getFileFormatEra()));
913  fileCatalogMetadata.insert("art.file_format_version",
914  std::to_string(getFileFormatVersion()));
915 
916  // Incoming stream-specific metadata overrides.
917  for (auto const& kv : ssmd)
918  {
919  fileCatalogMetadata.insert(kv.first, kv.second);
920  }
921  txn.commit();
922 }
923 
924 void RootDAQOutFile::writeParameterSetRegistry()
925 {
926  RecursiveMutexSentry sentry{mutex_, __func__};
927  fhicl::ParameterSetRegistry::exportTo(*rootFileDB_);
928 }
929 
930 void RootDAQOutFile::writeProductDescriptionRegistry()
931 {
932  RecursiveMutexSentry sentry{mutex_, __func__};
933  // Make a local copy of the UpdateOutputCallbacks's ProductList,
934  // removing any transient or pruned products.
935  ProductRegistry reg;
936  auto productDescriptionsToWrite = [this, &reg](BranchType const bt) {
937  for (auto const& pr : descriptionsToPersist_[bt]) // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index)
938  {
939  auto const& desc = pr.second;
940  reg.productList_.emplace(BranchKey{desc}, desc);
941  }
942  };
943  for_each_branch_type(productDescriptionsToWrite);
944  ProductRegistry const* regp = &reg;
945  TBranch* b = metaDataTree_->Branch(
946  metaBranchRootName<ProductRegistry>(), &regp, basketSize_, 0);
947  // FIXME: Turn this into a throw!
948  assert(b);
949  b->Fill();
950 }
951 
952 void RootDAQOutFile::writeProductDependencies()
953 {
954  RecursiveMutexSentry sentry{mutex_, __func__};
955  BranchChildren const* ppDeps = &om_->branchChildren();
956  TBranch* b = metaDataTree_->Branch(
957  metaBranchRootName<BranchChildren>(), &ppDeps, basketSize_, 0);
958  // FIXME: Turn this into a throw!
959  assert(b);
960  b->Fill();
961 }
962 
963 void RootDAQOutFile::writeResults(ResultsPrincipal& resp)
964 {
965  RecursiveMutexSentry sentry{mutex_, __func__};
966  pResultsAux_ = &resp.resultsAux();
967  fillBranches<InResults>(resp, pResultsProductProvenanceVector_);
968 }
969 
970 void RootDAQOutFile::writeTTrees()
971 {
972  TLOG(TLVL_TRACE) << "Start of RootDAQOutFile::writeTTrees";
973  RecursiveMutexSentry sentry{mutex_, __func__};
974  RootOutputTree::writeTTree(metaDataTree_);
975  TLOG(TLVL_TRACE) << "RootDAQOutFile::writeTTrees after writing metaDataTree_";
976  RootOutputTree::writeTTree(fileIndexTree_);
977  TLOG(TLVL_TRACE) << "RootDAQOutFile::writeTTrees after writing fileIndexTree_";
978  RootOutputTree::writeTTree(parentageTree_);
979  TLOG(TLVL_TRACE) << "RootDAQOutFile::writeTTrees after writing parentageTree_";
980  for_each_branch_type(
981  [this](BranchType const bt) { treePointers_[bt]->writeTree(); });
982  TLOG(TLVL_TRACE) << "End of RootDAQOutFile::writeTTrees";
983 }
984 
985 void RootDAQOutFile::setSubRunAuxiliaryRangeSetID(RangeSet const& ranges)
986 {
987  RecursiveMutexSentry sentry{mutex_, __func__};
988  subRunRSID_ = getNewRangeSetID(*rootFileDB_, InSubRun, ranges.run());
989  insertIntoEventRanges(*rootFileDB_, ranges);
990  auto const& eventRangesIDs = getExistingRangeSetIDs(*rootFileDB_, ranges);
991  insertIntoJoinTable(*rootFileDB_, InSubRun, subRunRSID_, eventRangesIDs);
992 }
993 
994 void RootDAQOutFile::setRunAuxiliaryRangeSetID(RangeSet const& ranges)
995 {
996  RecursiveMutexSentry sentry{mutex_, __func__};
997  runRSID_ = getNewRangeSetID(*rootFileDB_, InRun, ranges.run());
998  insertIntoEventRanges(*rootFileDB_, ranges);
999  auto const& eventRangesIDs = getExistingRangeSetIDs(*rootFileDB_, ranges);
1000  insertIntoJoinTable(*rootFileDB_, InRun, runRSID_, eventRangesIDs);
1001 }
1002 
1003 template<BranchType BT>
1004 EDProduct const*
1005 RootDAQOutFile::getProduct(OutputHandle const& oh,
1006  RangeSet const& prunedProductRS,
1007  string const& wrappedName)
1008 {
1009  RecursiveMutexSentry sentry{mutex_, __func__};
1010  if constexpr (detail::range_sets_supported(BT))
1011  {
1012  {
1013  }
1014  if (!prunedProductRS.is_valid())
1015  {
1016  return dummyProductCache_.product(wrappedName);
1017  }
1018  }
1019  return oh.isValid() ? oh.wrapper() : dummyProductCache_.product(wrappedName);
1020 }
1021 
1022 template<BranchType BT>
1023 void RootDAQOutFile::fillBranches(Principal const& principal,
1024  vector<ProductProvenance>* vpp)
1025 {
1026  TLOG(TLVL_TRACE) << "Start of RootDAQOutFile::fillBranches";
1027  RecursiveMutexSentry sentry{mutex_, __func__};
1028  bool const fastCloning = ((BT == InEvent) && wasFastCloned_);
1029  map<unsigned, unsigned> checksumToIndex;
1030  auto const& principalRS = principal.seenRanges();
1031  set<ProductProvenance> keptprv;
1032  for (auto const& val : selectedOutputItemList_[BT])
1033  {
1034  auto const& bd = val.branchDescription_;
1035  auto const pid = bd.productID();
1036  descriptionsToPersist_[BT].emplace(pid, bd);
1037  bool const produced = bd.produced();
1038  bool const resolveProd = (produced || !fastCloning ||
1039  treePointers_[BT]->uncloned(bd.branchName()));
1040  // Update the kept provenance
1041  bool const keepProvenance =
1042  ((dropMetaData_ == DropMetaData::DropNone) ||
1043  (produced && (dropMetaData_ == DropMetaData::DropPrior)));
1044  auto const& oh = principal.getForOutput(pid, resolveProd);
1045  auto prov = keptprv.begin();
1046  if (keepProvenance)
1047  {
1048  if (oh.productProvenance())
1049  {
1050  prov = keptprv.insert(*oh.productProvenance()).first;
1051  if ((dropMetaData_ != DropMetaData::DropAll) &&
1052  !dropMetaDataForDroppedData_)
1053  {
1054  {
1055  vector<ProductProvenance const*> stacked_pp;
1056  stacked_pp.push_back(&*oh.productProvenance());
1057  while (true)
1058  {
1059  if (stacked_pp.empty())
1060  {
1061  break;
1062  }
1063  auto current_pp = stacked_pp.back();
1064  stacked_pp.pop_back();
1065  for (auto const parent_bid :
1066  current_pp->parentage().parents())
1067  {
1068  // Note: Suppose the parent ProductID corresponds to
1069  // product that has been requested to be
1070  // "dropped"--i.e. someone has specified "drop
1071  // *_m1a_*_*" in their configuration, and
1072  // although a given product matching this
1073  // pattern will not be included in the
1074  // selectedProducts_ list, one of the parents of
1075  // a selected product can match the "dropping"
1076  // pattern and its BranchDescription will still
1077  // be written to disk since it is inserted into
1078  // the descriptionsToPersist_ data member.
1079  auto parent_bd = principal.getProductDescription(parent_bid);
1080  if (!parent_bd)
1081  {
1082  // FIXME: Is this an error condition?
1083  continue;
1084  }
1085  descriptionsToPersist_[BT].emplace(parent_bid, *parent_bd);
1086  if (!parent_bd->produced())
1087  {
1088  // We got it from the input, nothing to do.
1089  continue;
1090  }
1091  auto parent_pp =
1092  principal.branchToProductProvenance(parent_bid);
1093  if (!parent_pp || (dropMetaData_ != DropMetaData::DropNone))
1094  {
1095  continue;
1096  }
1097  if (!keptprv.insert(*parent_pp).second)
1098  {
1099  // Already there, done.
1100  continue;
1101  }
1102  if ((dropMetaData_ != DropMetaData::DropAll) &&
1103  !dropMetaDataForDroppedData_)
1104  {
1105  stacked_pp.push_back(parent_pp.get());
1106  }
1107  }
1108  }
1109  }
1110  }
1111  }
1112  else
1113  {
1114  // No provenance: product was either not produced, or was
1115  // dropped; create provenance to remember that.
1116  auto status = productstatus::dropped();
1117  if (produced)
1118  {
1119  status = productstatus::neverCreated();
1120  }
1121  prov = keptprv.emplace(pid, status).first;
1122  }
1123  }
1124  // Resolve the product if we are going to attempt to write it out.
1125  if (resolveProd)
1126  {
1127  // Product was either produced, or we are not cloning the whole
1128  // file and the product branch was not cloned so we should be
1129  // able to get a pointer to it from the passed principal and
1130  // write it out.
1131  auto const& rs = getRangeSet<BT>(oh, principalRS, produced);
1132  if (detail::range_sets_supported(BT) && !rs.is_valid())
1133  {
1134  // At this point we are now going to write out a dummy product
1135  // whose Wrapper present flag is false because the range set
1136  // got invalidated to present double counting when combining
1137  // run or subrun products from multiple fragments. We change
1138  // the provenance status that we are going to write out to
1139  // dummyToPreventDoubleCount to flag this case. Note that the
1140  // requirement is only that the status not be
1141  // productstatus::present(). We use a special code to make it
1142  // easier for humans to tell what is going on.
1143  auto prov_bid = prov->productID();
1144  if (keptprv.erase(*prov) != 1ull)
1145  {
1146  throw Exception(errors::LogicError, "KeptProvenance::setStatus") // NOLINT(cert-err60-cpp)
1147  << "Attempt to set product status for product whose provenance "
1148  "is not being recorded.\n";
1149  }
1150  prov =
1151  keptprv
1152  .emplace(prov_bid, productstatus::dummyToPreventDoubleCount())
1153  .first;
1154  }
1155  auto const* product = getProduct<BT>(oh, rs, bd.wrappedName());
1156  setProductRangeSetID<BT>(
1157  rs, *rootFileDB_, const_cast<EDProduct*>(product), checksumToIndex); // NOLINT(cppcoreguidelines-pro-type-const-cast)
1158  val.product_ = product;
1159  }
1160  }
1161  vpp->assign(keptprv.begin(), keptprv.end());
1162  for (auto const& val : *vpp)
1163  {
1164  if (val.productStatus() == productstatus::uninitialized())
1165  {
1166  throw Exception(errors::LogicError, // NOLINT(cert-err60-cpp)
1167  "RootDAQOutFile::fillBranches(principal, vpp):")
1168  << "Attempt to write a product with uninitialized provenance!\n";
1169  }
1170  }
1171 
1172  TLOG(TLVL_TRACE) << "RootDAQOutFile::fillBranches before fillTree call";
1173  treePointers_[BT]->fillTree();
1174  TLOG(TLVL_TRACE) << "RootDAQOutFile::fillBranches after fillTree call";
1175  vpp->clear();
1176  TLOG(TLVL_TRACE) << "End of RootDAQOutFile::fillBranches";
1177 }
1178 
1179 } // namespace art