artdaq  3.12.06
RootDAQOutFile.cc
1 #include "artdaq/ArtModules/RootDAQOutput-s124/RootDAQOutFile.h"
2 #include "TRACE/tracemf.h"
3 // vim: set sw=2 expandtab :
4 
5 #include "artdaq/DAQdata/Globals.hh"
6 
7 #include "art/Framework/Core/OutputFileGranularity.h"
8 #include "art/Framework/Core/OutputModule.h"
9 #include "art/Framework/IO/ClosingCriteria.h"
10 #include "art/Framework/IO/FileStatsCollector.h"
11 #include "art/Framework/Principal/EventPrincipal.h"
12 #include "art/Framework/Principal/RangeSetsSupported.h"
13 #include "art/Framework/Principal/ResultsPrincipal.h"
14 #include "art/Framework/Principal/RunPrincipal.h"
15 #include "art/Framework/Principal/SubRunPrincipal.h"
16 #include "art/Framework/Services/Registry/ServiceHandle.h"
17 #include "art/Framework/Services/System/DatabaseConnection.h"
18 #include "art/Persistency/Provenance/ProcessHistoryRegistry.h"
19 #include "art_root_io/DropMetaData.h"
20 #include "art_root_io/GetFileFormatEra.h"
21 #include "art_root_io/GetFileFormatVersion.h"
22 #include "art_root_io/RootDB/TKeyVFSOpenPolicy.h"
23 #include "art_root_io/RootFileBlock.h"
24 #include "art_root_io/checkDictionaries.h"
25 #include "art_root_io/detail/getObjectRequireDict.h"
26 #include "boost/date_time/posix_time/posix_time.hpp"
27 #include "canvas/Persistency/Provenance/BranchChildren.h"
28 #include "canvas/Persistency/Provenance/BranchType.h"
29 #include "canvas/Persistency/Provenance/EventAuxiliary.h"
30 #include "canvas/Persistency/Provenance/EventID.h"
31 #include "canvas/Persistency/Provenance/FileFormatVersion.h"
32 #include "canvas/Persistency/Provenance/Parentage.h"
33 #include "canvas/Persistency/Provenance/ParentageRegistry.h"
34 #include "canvas/Persistency/Provenance/ProductStatus.h"
35 #include "canvas/Persistency/Provenance/ResultsAuxiliary.h"
36 #include "canvas/Persistency/Provenance/RunAuxiliary.h"
37 #include "canvas/Persistency/Provenance/SubRunAuxiliary.h"
38 #include "canvas/Persistency/Provenance/rootNames.h"
39 #include "canvas/Utilities/Exception.h"
40 #include "canvas_root_io/Utilities/DictionaryChecker.h"
41 #include "cetlib/canonical_string.h"
42 #include "cetlib/container_algorithms.h"
43 #include "cetlib/exempt_ptr.h"
44 #include "cetlib/sqlite/Ntuple.h"
45 #include "cetlib/sqlite/Transaction.h"
46 #include "cetlib/sqlite/create_table.h"
47 #include "cetlib/sqlite/exec.h"
48 #include "cetlib/sqlite/insert.h"
49 #include "fhiclcpp/ParameterSetRegistry.h"
50 #include "messagefacility/MessageLogger/MessageLogger.h"
51 #include "range/v3/view.hpp"
52 
53 #include "TBranch.h"
54 
55 #define TRACE_NAME (app_name + "_RootDAQOutFile").c_str()
56 
57 #include "TFile.h"
58 #include "TTree.h"
59 
60 #include <fcntl.h> // posix_fadvise POSIX_FADV_DONTNEED
61 #include <sys/sysinfo.h> // sysinfo(sysinfo*)
62 #include <algorithm>
63 #include <utility>
64 #include <vector>
65 
66 using namespace cet;
67 using namespace hep::concurrency;
68 
69 using art::BranchType;
71 using art::rootNames::metaBranchRootName;
72 
73 using std::map;
74 using std::string;
75 using std::vector;
76 
77 namespace {
78 
79 void create_table(sqlite3* const db,
80  string const& name,
81  vector<string> const& columns,
82  string const& suffix = {})
83 {
84  if (columns.empty())
85  {
86  throw art::Exception(art::errors::LogicError) // NOLINT(cert-err60-cpp)
87  << "Number of sqlite columns specified for table: " << name << '\n'
88  << "is zero.\n";
89  }
90  string ddl = "DROP TABLE IF EXISTS " + name +
91  "; "
92  "CREATE TABLE " +
93  name + "(" + columns.front();
94  for_each(columns.begin() + 1, columns.end(), [&ddl](auto const& col) {
95  ddl += "," + col;
96  });
97  ddl += ") ";
98  ddl += suffix;
99  ddl += ";";
100  sqlite::exec(db, ddl);
101 }
102 
103 void insert_eventRanges_row(sqlite3_stmt* stmt,
104  art::SubRunNumber_t const sr,
105  art::EventNumber_t const b,
106  art::EventNumber_t const e)
107 {
108  sqlite3_bind_int64(stmt, 1, sr);
109  sqlite3_bind_int64(stmt, 2, b);
110  sqlite3_bind_int64(stmt, 3, e);
111  sqlite3_step(stmt);
112  sqlite3_reset(stmt);
113 }
114 
115 void insert_rangeSets_eventSets_row(sqlite3_stmt* stmt,
116  unsigned const rsid,
117  unsigned const esid)
118 {
119  sqlite3_bind_int64(stmt, 1, rsid);
120  sqlite3_bind_int64(stmt, 2, esid);
121  sqlite3_step(stmt);
122  sqlite3_reset(stmt);
123 }
124 
125 unsigned
126 getNewRangeSetID(sqlite3* db,
127  art::BranchType const bt,
128  art::RunNumber_t const r)
129 {
130  sqlite::insert_into(db, art::BranchTypeToString(bt) + "RangeSets")
131  .values(r);
132  return sqlite3_last_insert_rowid(db);
133 }
134 
135 vector<unsigned>
136 getExistingRangeSetIDs(sqlite3* db, art::RangeSet const& rs)
137 {
138  using namespace std;
139  vector<unsigned> rangeSetIDs;
140  cet::transform_all(rs, back_inserter(rangeSetIDs), [db](auto const& range) {
141  sqlite::query_result<unsigned> r;
142  r << sqlite::select("ROWID")
143  .from(db, "EventRanges")
144  .where("SubRun=" + to_string(range.subRun()) +
145  " AND "
146  "begin=" +
147  to_string(range.begin()) +
148  " AND "
149  "end=" +
150  to_string(range.end()));
151  return unique_value(r);
152  });
153  return rangeSetIDs;
154 }
155 
156 void insertIntoEventRanges(sqlite3* db, art::RangeSet const& rs)
157 {
158  sqlite::Transaction txn{db};
159  sqlite3_stmt* stmt{nullptr};
160  string const ddl{
161  "INSERT INTO EventRanges(SubRun, begin, end) "
162  "VALUES(?, ?, ?);"};
163  sqlite3_prepare_v2(db, ddl.c_str(), -1, &stmt, nullptr);
164  for (auto const& range : rs)
165  {
166  insert_eventRanges_row(stmt, range.subRun(), range.begin(), range.end());
167  }
168  sqlite3_finalize(stmt);
169  txn.commit();
170 }
171 
172 void insertIntoJoinTable(sqlite3* db,
173  art::BranchType const bt,
174  unsigned const rsID,
175  vector<unsigned> const& eventRangesIDs)
176 {
177  sqlite::Transaction txn{db};
178  sqlite3_stmt* stmt{nullptr};
179  string const ddl{
180  "INSERT INTO " + art::BranchTypeToString(bt) +
181  "RangeSets_EventRanges(RangeSetsID, EventRangesID) Values(?,?);"};
182  sqlite3_prepare_v2(db, ddl.c_str(), -1, &stmt, nullptr);
183  cet::for_all(eventRangesIDs, [stmt, rsID](auto const eventRangeID) {
184  insert_rangeSets_eventSets_row(stmt, rsID, eventRangeID);
185  });
186  sqlite3_finalize(stmt);
187  txn.commit();
188 }
189 
190 void maybeInvalidateRangeSet(BranchType const bt,
191  art::RangeSet const& principalRS,
192  art::RangeSet& productRS)
193 {
194  assert(principalRS.is_sorted());
195  assert(productRS.is_sorted());
196  if (!productRS.is_valid())
197  {
198  return;
199  }
200  if (bt == art::InRun && productRS.is_full_run())
201  {
202  return;
203  }
204  if (bt == art::InSubRun && productRS.is_full_subRun())
205  {
206  return;
207  }
208  if (productRS.ranges().empty())
209  {
210  return;
211  }
212  auto const r = productRS.run();
213  auto const& productFront = productRS.ranges().front();
214  if (!principalRS.contains(r, productFront.subRun(), productFront.begin()))
215  {
216  productRS = art::RangeSet::invalid();
217  }
218 }
219 
220 // The purpose of 'maybeInvalidateRangeSet' is to support the
221 // following situation. Suppose process 1 creates three files with
222 // one Run product each, all corresponding to the same Run. Let's
223 // call the individual Run product instances in the three separate
224 // files as A, B, and C. Now suppose that the three files serve as
225 // inputs to process 2, where a concatenation is being performed AND
226 // ALSO an output file switch. Process 2 results in two output
227 // files, and now, in process 3, we concatenate the outputs from
228 // process 2. The situation would look like this:
229 //
230 // Process 1: [A] [B] [C]
231 // \ / \ /
232 // Process 2: [A + B] [B + C]
233 // \ / \ /
234 // D=agg(A,B) | | E=agg(B,C)
235 // \ /
236 // Process 3: [D + E]
237 //
238 // Notice the complication in process 3: product 'B' will be
239 // aggregated twice: once with A, and once with C. Whenever the
240 // output from process 3 is read as input to another process, the
241 // fetched product will be equivalent to A+2B+C.
242 //
243 // To avoid this situation, we compare the RangeSet of the product
244 // with the RangeSet of the in-memory RunAuxiliary. If the
245 // beginning of B's RangeSet is not contained within the auxiliary's
246 // RangeSet, then a dummy product with an invalid RangeSet is
247 // written to disk. Instead of the diagram above, we have:
248 //
249 // Process 1: [A] [B] [C]
250 // \ / \ /
251 // Process 2: [A + B] [x + C]
252 // \ / \ /
253 // D=agg(A,B) | | E=agg(x,C)=C
254 // \ /
255 // Process 3: [D + E]
256 //
257 // where 'x' represent a dummy product. Upon aggregating D and E,
258 // we obtain the correctly formed A+B+C product.
259 template<BranchType BT>
260 art::RangeSet
261 getRangeSet(art::OutputHandle const& oh,
262  art::RangeSet const& principalRS,
263  bool const producedInThisProcess)
264 {
265  if constexpr (!art::detail::range_sets_supported(BT))
266  {
267  return art::RangeSet::invalid();
268  }
269 
270  auto rs = oh.isValid() ? oh.rangeOfValidity() : art::RangeSet::invalid();
271  // Because a user can specify (e.g.):
272  // r.put(std::move(myProd), art::runFragment(myRangeSet));
273  // products that are produced in this process can have valid, yet
274  // arbitrary RangeSets. We therefore never invalidate a RangeSet
275  // that corresponds to a product produced in this process.
276  //
277  // It is possible for a user to specify a RangeSet which does not
278  // correspond AT ALL to the in-memory auxiliary RangeSet. In that
279  // case, users should not expect to be able to retrieve products
280  // for which no corresponding events or sub-runs were processed.
281  if (!producedInThisProcess)
282  {
283  maybeInvalidateRangeSet(BT, principalRS, rs);
284  }
285  return rs;
286 }
287 
288 template<BranchType BT>
289 void setProductRangeSetID(art::RangeSet const& rs,
290  sqlite3* db,
291  art::EDProduct* product,
292  map<unsigned, unsigned>& checksumToIndexLookup)
293 {
294  if constexpr (!art::detail::range_sets_supported(BT))
295  {
296  return;
297  }
298 
299  if (!rs.is_valid())
300  { // Invalid range-sets not written to DB
301  return;
302  }
303  // Set range sets for SubRun and Run products
304  auto it = checksumToIndexLookup.find(rs.checksum());
305  if (it != checksumToIndexLookup.cend())
306  {
307  product->setRangeSetID(it->second);
308  }
309  else
310  {
311  unsigned const rsID = getNewRangeSetID(db, BT, rs.run());
312  product->setRangeSetID(rsID);
313  checksumToIndexLookup.emplace(rs.checksum(), rsID);
314  insertIntoEventRanges(db, rs);
315  auto const& eventRangesIDs = getExistingRangeSetIDs(db, rs);
316  insertIntoJoinTable(db, BT, rsID, eventRangesIDs);
317  }
318 }
319 
320 } // unnamed namespace
321 
322 namespace art {
323 
324 OutputItem::~OutputItem() = default;
325 
326 OutputItem::OutputItem(BranchDescription const& bd)
327  : branchDescription{bd}, product{nullptr}
328 {}
329 
330 RootDAQOutFile::RootDAQOutFile(OutputModule* om,
331  string const& fileName,
332  ClosingCriteria const& fileSwitchCriteria,
333  int const compressionLevel,
334  unsigned freePercent,
335  unsigned freeMB,
336  int64_t const saveMemoryObjectThreshold,
337  int64_t const treeMaxVirtualSize,
338  int const splitLevel,
339  int const basketSize,
340  DropMetaData dropMetaData,
341  bool const dropMetaDataForDroppedData)
342  : om_{om}
343  , file_{fileName}
344  , fileSwitchCriteria_{fileSwitchCriteria}
345  , compressionLevel_{compressionLevel}
346  , freePercent_{freePercent}
347  , freeMB_{freeMB}
348  , saveMemoryObjectThreshold_{saveMemoryObjectThreshold}
349  , treeMaxVirtualSize_{treeMaxVirtualSize}
350  , splitLevel_{splitLevel}
351  , basketSize_{basketSize}
352  , dropMetaData_{dropMetaData}
353  , dropMetaDataForDroppedData_{dropMetaDataForDroppedData}
354  , filePtr_{TFile::Open(file_.c_str(), "recreate", "", compressionLevel)}
355 {
356  using std::make_unique;
357  // Don't split metadata tree or event description tree
358  metaDataTree_ = RootOutputTree::makeTTree(
359  filePtr_.get(), rootNames::metaDataTreeName(), 0);
360  fileIndexTree_ = RootOutputTree::makeTTree(
361  filePtr_.get(), rootNames::fileIndexTreeName(), 0);
362  parentageTree_ = RootOutputTree::makeTTree(
363  filePtr_.get(), rootNames::parentageTreeName(), 0);
364  treePointers_[0] =
365  make_unique<RootOutputTree>(filePtr_.get(),
366  InEvent,
367  pEventAux_,
368  pEventProductProvenanceVector_,
369  basketSize,
370  splitLevel,
371  treeMaxVirtualSize,
372  saveMemoryObjectThreshold);
373  treePointers_[1] =
374  make_unique<RootOutputTree>(filePtr_.get(),
375  InSubRun,
376  pSubRunAux_,
377  pSubRunProductProvenanceVector_,
378  basketSize,
379  splitLevel,
380  treeMaxVirtualSize,
381  saveMemoryObjectThreshold);
382  treePointers_[2] = make_unique<RootOutputTree>(filePtr_.get(),
383  InRun,
384  pRunAux_,
385  pRunProductProvenanceVector_,
386  basketSize,
387  splitLevel,
388  treeMaxVirtualSize,
389  saveMemoryObjectThreshold);
390  treePointers_[3] =
391  make_unique<RootOutputTree>(filePtr_.get(),
392  InResults,
393  pResultsAux_,
394  pResultsProductProvenanceVector_,
395  basketSize,
396  splitLevel,
397  treeMaxVirtualSize,
398  saveMemoryObjectThreshold);
399  rootFileDB_.reset(ServiceHandle<DatabaseConnection> {
400  } -> get<TKeyVFSOpenPolicy>("RootFileDB",
401  filePtr_.get(),
402  SQLITE_OPEN_CREATE | SQLITE_OPEN_READWRITE));
403  beginTime_ = std::chrono::steady_clock::now();
404  // Check that dictionaries for the auxiliaries exist
405  root::DictionaryChecker checker;
406  checker.checkDictionaries<EventAuxiliary>();
407  checker.checkDictionaries<SubRunAuxiliary>();
408  checker.checkDictionaries<RunAuxiliary>();
409  checker.checkDictionaries<ResultsAuxiliary>();
410  checker.reportMissingDictionaries();
411 
412  createDatabaseTables();
413  TLOG(TLVL_DEBUG + 32) << "RootDAQOutFile ctor complete";
414 }
415 
416 art::RootDAQOutFile::~RootDAQOutFile()
417 {
418  struct sysinfo info;
419  int sts = sysinfo(&info);
420  auto free_percent = static_cast<unsigned>(info.freeram * 100 / info.totalram);
421  auto free_MB = static_cast<unsigned>(info.freeram * info.mem_unit >> 20); // round down (1024.9 => 1024 MB)
422  TRACE(TLVL_DEBUG + 32, "~RootDAQOutFile free %%%u %.1fMB (%u) buffers=%fGB mem_unit=%u", // NOLINT
423  free_percent, static_cast<float>(info.freeram * info.mem_unit / (1024 * 1024.0)),
424  free_MB, static_cast<float>(info.bufferram * info.mem_unit / (1024 * 1024 * 1024.0)), info.mem_unit);
425  if (free_percent < freePercent_ || free_MB < freeMB_)
426  {
427  TLOG(TLVL_DEBUG + 32) << "RootDAQOutFile Flush/DONTNEED";
428  filePtr_->Flush();
429  sts = posix_fadvise(filePtr_->GetFd(), 0, 0 /*len,0=all*/, POSIX_FADV_DONTNEED);
430  }
431  TLOG(TLVL_DEBUG + 32) << "~RootDAQOutFile complete sts=" << sts;
432 }
433 
434 void art::RootDAQOutFile::createDatabaseTables()
435 {
436  // Event ranges
437  create_table(*rootFileDB_,
438  "EventRanges",
439  {"SubRun INTEGER",
440  "begin INTEGER",
441  "end INTEGER",
442  "UNIQUE (SubRun,begin,end) ON CONFLICT IGNORE"});
443  // SubRun range sets
444  using namespace cet::sqlite;
445  create_table(*rootFileDB_, "SubRunRangeSets", column<int>{"Run"});
446  create_table(*rootFileDB_,
447  "SubRunRangeSets_EventRanges",
448  {"RangeSetsID INTEGER",
449  "EventRangesID INTEGER",
450  "PRIMARY KEY(RangeSetsID,EventRangesID)"},
451  "WITHOUT ROWID");
452  // Run range sets
453  create_table(*rootFileDB_, "RunRangeSets", column<int>{"Run"});
454  create_table(*rootFileDB_,
455  "RunRangeSets_EventRanges",
456  {"RangeSetsID INTEGER",
457  "EventRangesID INTEGER",
458  "PRIMARY KEY(RangeSetsID,EventRangesID)"},
459  "WITHOUT ROWID");
460 }
461 
462 void RootDAQOutFile::setFileStatus(OutputFileStatus const ofs)
463 {
464  std::lock_guard sentry{mutex_};
465  status_ = ofs;
466 }
467 
468 string const&
469 RootDAQOutFile::currentFileName() const
470 {
471  std::lock_guard sentry{mutex_};
472  return file_;
473 }
474 
475 void RootDAQOutFile::selectProducts()
476 {
477  std::lock_guard sentry{mutex_};
478  auto selectProductsToWrite = [this](BranchType const bt) {
479  auto& items = selectedOutputItemList_[bt];
480  for (auto const& pd : om_->keptProducts()[bt] | ranges::views::values)
481  {
482  // Persist Results products only if they have been produced by
483  // the current process.
484  if (bt == InResults && !pd.produced())
485  {
486  continue;
487  }
488  checkDictionaries(pd);
489  // Although the transient flag is already checked when
490  // OutputModule::doSelectProducts is called, it can be flipped
491  // to 'true' after the BranchDescription transients have been
492  // fluffed, which happens during the checkDictionaries call.
493  if (pd.transient())
494  {
495  continue;
496  }
497  items.try_emplace(pd.productID(), pd);
498  }
499  for (auto const& item : items | ranges::views::values)
500  {
501  treePointers_[bt]->addOutputBranch(item.branchDescription,
502  item.product);
503  }
504  };
505  for_each_branch_type(selectProductsToWrite);
506 }
507 
508 void RootDAQOutFile::beginInputFile(RootFileBlock const* rfb,
509  FastCloningEnabled fastCloningEnabled)
510 {
511  std::lock_guard sentry{mutex_};
512 
513  // Create output branches, and then redo calculation to determine
514  // if fast cloning should be done.
515  selectProducts();
516 
517  cet::exempt_ptr<TTree const> inputTree{nullptr};
518  if (rfb)
519  {
520  if (rfb->fileFormatVersion().value_ < 10)
521  {
522  fastCloningEnabled.disable(
523  "The input file has a different ProductID "
524  "schema than the in-memory schema.");
525  }
526  inputTree = rfb->tree();
527  if (inputTree)
528  {
529  if (!treePointers_[InEvent]->checkSplitLevelAndBasketSize(inputTree))
530  {
531  fastCloningEnabled.disable(
532  "The splitting level and/or basket size does not match between "
533  "input and output file.");
534  }
535  if (inputTree->GetCurrentFile()->GetVersion() < 60001)
536  {
537  fastCloningEnabled.disable(
538  "The ROOT version used to write the input file (< 6.00/01)\nhas a "
539  "different splitting policy.");
540  }
541  }
542  }
543 
544  if (not fastCloningEnabled)
545  {
546  mf::LogWarning("FastCloning") << fastCloningEnabled.disabledBecause();
547  return;
548  }
549 
550  mf::LogInfo("FastCloning")
551  << "Fast cloning event data products from input file.";
552  wasFastCloned_ = treePointers_[InEvent]->fastCloneTree(inputTree);
553 }
554 
555 void RootDAQOutFile::incrementInputFileNumber()
556 {
557  std::lock_guard sentry{mutex_};
558  fp_.update_inputFile();
559 }
560 
561 void RootDAQOutFile::respondToCloseInputFile(FileBlock const&)
562 {
563  std::lock_guard sentry{mutex_};
564  cet::for_all(treePointers_, [](auto const& p) { p->setEntries(); });
565 }
566 
567 bool RootDAQOutFile::requestsToCloseFile()
568 {
569  std::lock_guard sentry{mutex_};
570  using namespace std::chrono;
571  unsigned int constexpr oneK{1024u};
572  fp_.updateSize(filePtr_->GetSize() / oneK);
573  fp_.updateAge(duration_cast<seconds>(steady_clock::now() - beginTime_));
574  return fileSwitchCriteria_.should_close(fp_);
575 }
576 
577 void RootDAQOutFile::writeOne(EventPrincipal const& e)
578 {
579  std::lock_guard sentry{mutex_};
580  TLOG(TLVL_DEBUG + 33) << "Start of RootDAQOutFile::writeOne";
581  // Note: The pEventAux_ must be set before calling fillBranches
582  // since it gets written out in that routine.
583  pEventAux_ = &e.eventAux();
584  // Because getting the data may cause an exception to be thrown we
585  // want to do that first before writing anything to the file about
586  // this event.
587  fillBranches<InEvent>(e, pEventProductProvenanceVector_);
588 
589  // Add the dataType to the job report if it hasn't already been done
590  if (!dataTypeReported_)
591  {
592  string dataType{"MC"};
593  if (pEventAux_->isRealData())
594  {
595  dataType = "Data";
596  }
597  dataTypeReported_ = true;
598  }
599  // Add event to index
600  fileIndex_.addEntry(pEventAux_->eventID(), fp_.eventEntryNumber());
601  fp_.update_event();
602  TLOG(TLVL_DEBUG + 33) << "End of RootDAQOutFile::writeOne";
603 }
604 
605 void RootDAQOutFile::writeSubRun(SubRunPrincipal const& sr)
606 {
607  std::lock_guard sentry{mutex_};
608  pSubRunAux_ = &sr.subRunAux();
609  pSubRunAux_->setRangeSetID(subRunRSID_);
610  fillBranches<InSubRun>(sr, pSubRunProductProvenanceVector_);
611  fileIndex_.addEntry(EventID::invalidEvent(pSubRunAux_->subRunID()),
612  fp_.subRunEntryNumber());
613  fp_.update_subRun(status_);
614 }
615 
616 void RootDAQOutFile::writeRun(RunPrincipal const& r)
617 {
618  std::lock_guard sentry{mutex_};
619  pRunAux_ = &r.runAux();
620  pRunAux_->setRangeSetID(runRSID_);
621  fillBranches<InRun>(r, pRunProductProvenanceVector_);
622  fileIndex_.addEntry(EventID::invalidEvent(pRunAux_->runID()),
623  fp_.runEntryNumber());
624  fp_.update_run(status_);
625 }
626 
627 void RootDAQOutFile::writeParentageRegistry()
628 {
629  std::lock_guard sentry{mutex_};
630  auto pid = root::getObjectRequireDict<ParentageID>();
631  ParentageID const* hash = &pid;
632  if (!parentageTree_->Branch(
633  rootNames::parentageIDBranchName().c_str(), &hash, basketSize_, 0))
634  {
635  throw Exception(errors::FatalRootError) // NOLINT(cert-err60-cpp)
636  << "Failed to create a branch for ParentageIDs in the output file";
637  }
638  hash = nullptr;
639  auto par = root::getObjectRequireDict<Parentage>();
640  Parentage const* desc = &par;
641  if (!parentageTree_->Branch(
642  rootNames::parentageBranchName().c_str(), &desc, basketSize_, 0))
643  {
644  throw Exception(errors::FatalRootError) // NOLINT(cert-err60-cpp)
645  << "Failed to create a branch for Parentages in the output file";
646  }
647  desc = nullptr;
648  for (auto const& pr : ParentageRegistry::get())
649  {
650  hash = &pr.first;
651  desc = &pr.second;
652  parentageTree_->Fill();
653  }
654  parentageTree_->SetBranchAddress(rootNames::parentageIDBranchName().c_str(),
655  nullptr);
656  parentageTree_->SetBranchAddress(rootNames::parentageBranchName().c_str(),
657  nullptr);
658 }
659 
660 void RootDAQOutFile::writeFileFormatVersion()
661 {
662  std::lock_guard sentry{mutex_};
663  FileFormatVersion const ver{getFileFormatVersion(), getFileFormatEra()};
664  auto const* pver = &ver;
665  TBranch* b = metaDataTree_->Branch(
666  metaBranchRootName<FileFormatVersion>(), &pver, basketSize_, 0);
667  // FIXME: Turn this into a throw!
668  assert(b);
669  b->Fill();
670 }
671 
672 void RootDAQOutFile::writeFileIndex()
673 {
674  std::lock_guard sentry{mutex_};
675  fileIndex_.sortBy_Run_SubRun_Event();
676  FileIndex::Element elem{};
677  auto const* findexElemPtr = &elem;
678  TBranch* b = fileIndexTree_->Branch(
679  metaBranchRootName<FileIndex::Element>(), &findexElemPtr, basketSize_, 0);
680  // FIXME: Turn this into a throw!
681  assert(b);
682  for (auto& entry : fileIndex_)
683  {
684  findexElemPtr = &entry;
685  b->Fill();
686  }
687  b->SetAddress(0);
688 }
689 
690 void RootDAQOutFile::writeProcessConfigurationRegistry()
691 {
692  // We don't do this yet; currently we're storing a slightly
693  // bloated ProcessHistoryRegistry.
694 }
695 
696 void RootDAQOutFile::writeProcessHistoryRegistry()
697 {
698  std::lock_guard sentry{mutex_};
699  ProcessHistoryMap pHistMap;
700  for (auto const& pr : ProcessHistoryRegistry::get())
701  {
702  pHistMap.emplace(pr);
703  }
704  auto const* p = &pHistMap;
705  TBranch* b = metaDataTree_->Branch(
706  metaBranchRootName<ProcessHistoryMap>(), &p, basketSize_, 0);
707  if (b == nullptr)
708  {
709  throw Exception(errors::LogicError) // NOLINT(cert-err60-cpp)
710  << "Unable to locate required "
711  "ProcessHistoryMap branch in output "
712  "metadata tree.\n";
713  }
714  b->Fill();
715 }
716 
717 void RootDAQOutFile::writeFileCatalogMetadata(
718  FileStatsCollector const& stats,
719  FileCatalogMetadata::collection_type const& md,
720  FileCatalogMetadata::collection_type const& ssmd)
721 {
722  std::lock_guard sentry{mutex_};
723  using namespace cet::sqlite;
724  Ntuple<string, string> fileCatalogMetadata{
725  *rootFileDB_, "FileCatalog_metadata", {{"Name", "Value"}}, true};
726  for (auto const& [key, value] : md)
727  {
728  fileCatalogMetadata.insert(key, value);
729  }
730 
731  // Add our own specific information: File format and friends.
732  fileCatalogMetadata.insert("file_format", "\"artroot\"");
733 
734  // File start time.
735  namespace bpt = boost::posix_time;
736  auto formatted_time = [](auto const& t) {
737  return cet::canonical_string(bpt::to_iso_extended_string(t));
738  };
739  fileCatalogMetadata.insert("start_time",
740  formatted_time(stats.outputFileOpenTime()));
741  // File "end" time: now, since file is not actually closed yet.
742  fileCatalogMetadata.insert(
743  "end_time",
744  formatted_time(boost::posix_time::second_clock::universal_time()));
745  // Run/subRun information.
746  if (!stats.seenSubRuns().empty())
747  {
748  auto I = find_if(md.crbegin(), md.crend(), [](auto const& p) {
749  return p.first == "art.run_type";
750  });
751  if (I != md.crend())
752  {
753  std::ostringstream buf;
754  buf << "[ ";
755  for (auto const& srid : stats.seenSubRuns())
756  {
757  buf << "[ " << srid.run() << ", " << srid.subRun() << ", "
758  << cet::canonical_string(I->second) << " ], ";
759  }
760  // Rewind over last delimiter.
761  buf.seekp(-2, std::ios_base::cur);
762  buf << " ]";
763  fileCatalogMetadata.insert("runs", buf.str());
764  }
765  }
766  // Number of events.
767  fileCatalogMetadata.insert("event_count",
768  std::to_string(stats.eventsThisFile()));
769  fileCatalogMetadata.insert("first_event",
770  std::to_string(stats.lowestEventID().event()));
771  fileCatalogMetadata.insert("last_event",
772  std::to_string(stats.highestEventID().event()));
773  // File parents.
774  if (!stats.parents().empty())
775  {
776  std::ostringstream pstring;
777  pstring << "[ ";
778  for (auto const& parent : stats.parents())
779  {
780  pstring << cet::canonical_string(parent) << ", ";
781  }
782  // Rewind over last delimiter.
783  pstring.seekp(-2, std::ios_base::cur);
784  pstring << " ]";
785  fileCatalogMetadata.insert("parents", pstring.str());
786  }
787 
788  // The following need to be encapsulated in an art table
789  // first_event and last_event.
790  auto eidToTuple = [](EventID const& eid) -> string {
791  std::ostringstream eidStr;
792  eidStr << "[ " << eid.run() << ", " << eid.subRun() << ", " << eid.event()
793  << " ]";
794  return eidStr.str();
795  };
796  fileCatalogMetadata.insert("art.first_event",
797  eidToTuple(stats.lowestEventID()));
798  fileCatalogMetadata.insert("art.last_event",
799  eidToTuple(stats.highestEventID()));
800  fileCatalogMetadata.insert("art.file_format_era",
801  cet::canonical_string(getFileFormatEra()));
802  fileCatalogMetadata.insert("art.file_format_version",
803  std::to_string(getFileFormatVersion()));
804 
805  // Incoming stream-specific metadata overrides.
806  for (auto const& [key, value] : ssmd)
807  {
808  fileCatalogMetadata.insert(key, value);
809  }
810 }
811 
812 void RootDAQOutFile::writeParameterSetRegistry()
813 {
814  std::lock_guard sentry{mutex_};
815  fhicl::ParameterSetRegistry::exportTo(*rootFileDB_);
816 }
817 
818 void RootDAQOutFile::writeProductDescriptionRegistry()
819 {
820  std::lock_guard sentry{mutex_};
821  // Make a local copy of the UpdateOutputCallbacks's ProductList,
822  // removing any transient or pruned products.
823  ProductRegistry reg;
824  auto productDescriptionsToWrite = [this, &reg](BranchType const bt) {
825  for (auto const& desc :
826  descriptionsToPersist_[bt] | ranges::views::values)
827  {
828  reg.productList_.emplace(BranchKey{desc}, desc);
829  }
830  };
831  for_each_branch_type(productDescriptionsToWrite);
832  ProductRegistry const* regp = &reg;
833  TBranch* b = metaDataTree_->Branch(
834  metaBranchRootName<ProductRegistry>(), &regp, basketSize_, 0);
835  // FIXME: Turn this into a throw!
836  assert(b);
837  b->Fill();
838 }
839 
840 void RootDAQOutFile::writeProductDependencies()
841 {
842  std::lock_guard sentry{mutex_};
843  BranchChildren const* ppDeps = &om_->branchChildren();
844  TBranch* b = metaDataTree_->Branch(
845  metaBranchRootName<BranchChildren>(), &ppDeps, basketSize_, 0);
846  // FIXME: Turn this into a throw!
847  assert(b);
848  b->Fill();
849 }
850 
851 void RootDAQOutFile::writeResults(ResultsPrincipal& resp)
852 {
853  std::lock_guard sentry{mutex_};
854  pResultsAux_ = &resp.resultsAux();
855  fillBranches<InResults>(resp, pResultsProductProvenanceVector_);
856 }
857 
858 void RootDAQOutFile::writeTTrees()
859 {
860  TLOG(TLVL_DEBUG + 33) << "Start of RootDAQOutFile::writeTTrees";
861  std::lock_guard sentry{mutex_};
862  RootOutputTree::writeTTree(metaDataTree_);
863  TLOG(TLVL_DEBUG + 33) << "RootDAQOutFile::writeTTrees after writing metaDataTree_";
864  RootOutputTree::writeTTree(fileIndexTree_);
865  TLOG(TLVL_DEBUG + 33) << "RootDAQOutFile::writeTTrees after writing fileIndexTree_";
866  RootOutputTree::writeTTree(parentageTree_);
867  TLOG(TLVL_DEBUG + 33) << "RootDAQOutFile::writeTTrees after writing parentageTree_";
868  for_each_branch_type(
869  [this](BranchType const bt) { treePointers_[bt]->writeTree(); });
870  TLOG(TLVL_DEBUG + 33) << "End of RootDAQOutFile::writeTTrees";
871 }
872 
873 void RootDAQOutFile::setSubRunAuxiliaryRangeSetID(RangeSet const& ranges)
874 {
875  std::lock_guard sentry{mutex_};
876  subRunRSID_ = getNewRangeSetID(*rootFileDB_, InSubRun, ranges.run());
877  insertIntoEventRanges(*rootFileDB_, ranges);
878  auto const& eventRangesIDs = getExistingRangeSetIDs(*rootFileDB_, ranges);
879  insertIntoJoinTable(*rootFileDB_, InSubRun, subRunRSID_, eventRangesIDs);
880 }
881 
882 void RootDAQOutFile::setRunAuxiliaryRangeSetID(RangeSet const& ranges)
883 {
884  std::lock_guard sentry{mutex_};
885  runRSID_ = getNewRangeSetID(*rootFileDB_, InRun, ranges.run());
886  insertIntoEventRanges(*rootFileDB_, ranges);
887  auto const& eventRangesIDs = getExistingRangeSetIDs(*rootFileDB_, ranges);
888  insertIntoJoinTable(*rootFileDB_, InRun, runRSID_, eventRangesIDs);
889 }
890 
891 template<BranchType BT>
892 EDProduct const*
893 RootDAQOutFile::getProduct(OutputHandle const& oh,
894  RangeSet const& prunedProductRS,
895  string const& wrappedName)
896 {
897  std::lock_guard sentry{mutex_};
898  if constexpr (detail::range_sets_supported(BT))
899  {
900  if (!prunedProductRS.is_valid())
901  {
902  return dummyProductCache_.product(wrappedName);
903  }
904  }
905  return oh.isValid() ? oh.wrapper() : dummyProductCache_.product(wrappedName);
906 }
907 
908 template<BranchType BT>
909 void RootDAQOutFile::fillBranches(Principal const& principal,
910  vector<ProductProvenance>* vpp)
911 {
912  TLOG(TLVL_DEBUG + 33) << "Start of RootDAQOutFile::fillBranches";
913  std::lock_guard sentry{mutex_};
914  bool const fastCloning{BT == InEvent && wasFastCloned_};
915  map<unsigned, unsigned> checksumToIndex;
916  auto const& principalRS = principal.seenRanges();
917 
918  // Local variables to avoid many functions calls to
919  // DropMetaData::operator==().
920  bool const drop_no_metadata{dropMetaData_ == DropMetaData::DropNone};
921  bool const drop_prior_metadata{dropMetaData_ == DropMetaData::DropPrior};
922  bool const drop_all_metadata{dropMetaData_ == DropMetaData::DropAll};
923 
924  std::set<ProductProvenance> keptprv;
925  for (auto const& [pid, val] : selectedOutputItemList_[BT])
926  {
927  auto const& bd = val.branchDescription;
928  descriptionsToPersist_[BT].try_emplace(pid, bd);
929  bool const produced = bd.produced();
930  bool const resolveProd{produced || !fastCloning ||
931  treePointers_[BT]->uncloned(bd.branchName())};
932  // Update the kept provenance
933  bool const keepProvenance =
934  drop_no_metadata || (produced && drop_prior_metadata);
935  auto const& oh = principal.getForOutput(pid, resolveProd);
936  auto prov = keptprv.begin();
937  if (keepProvenance)
938  {
939  if (oh.productProvenance())
940  {
941  prov = keptprv.insert(*oh.productProvenance()).first;
942  if (!drop_all_metadata && !dropMetaDataForDroppedData_)
943  {
944  {
945  vector<ProductProvenance const*> stacked_pp;
946  stacked_pp.push_back(&*oh.productProvenance());
947  while (not empty(stacked_pp))
948  {
949  auto current_pp = stacked_pp.back();
950  stacked_pp.pop_back();
951  for (auto const parent_bid :
952  current_pp->parentage().parents())
953  {
954  // Note: Suppose the parent ProductID corresponds to
955  // product that has been requested to be
956  // "dropped"--i.e. someone has specified "drop
957  // *_m1a_*_*" in their configuration, and
958  // although a given product matching this
959  // pattern will not be included in the
960  // selectedProducts_ list, one of the parents of
961  // a selected product can match the "dropping"
962  // pattern and its BranchDescription will still
963  // be written to disk since it is inserted into
964  // the descriptionsToPersist_ data member.
965  auto parent_bd = principal.getProductDescription(parent_bid);
966  if (!parent_bd)
967  {
968  // FIXME: Is this an error condition?
969  continue;
970  }
971  descriptionsToPersist_[BT].try_emplace(parent_bid,
972  *parent_bd);
973  if (!parent_bd->produced())
974  {
975  // We got it from the input, nothing to do.
976  continue;
977  }
978  auto parent_pp =
979  principal.branchToProductProvenance(parent_bid);
980  if (!parent_pp || !drop_no_metadata)
981  {
982  continue;
983  }
984  if (!keptprv.insert(*parent_pp).second)
985  {
986  // Already there, done.
987  continue;
988  }
989  if (!drop_all_metadata && !dropMetaDataForDroppedData_)
990  {
991  stacked_pp.push_back(parent_pp.get());
992  }
993  }
994  }
995  }
996  }
997  }
998  else
999  {
1000  // No provenance: product was either not produced, or was
1001  // dropped; create provenance to remember that.
1002  auto status = productstatus::dropped();
1003  if (produced)
1004  {
1005  status = productstatus::neverCreated();
1006  }
1007  prov = keptprv.emplace(pid, status).first;
1008  }
1009  }
1010  // Resolve the product if we are going to attempt to write it out.
1011  if (resolveProd)
1012  {
1013  // Product was either produced, or we are not cloning the whole
1014  // file and the product branch was not cloned so we should be
1015  // able to get a pointer to it from the passed principal and
1016  // write it out.
1017  auto const& rs = getRangeSet<BT>(oh, principalRS, produced);
1018  if (detail::range_sets_supported(BT) && !rs.is_valid())
1019  {
1020  // At this point we are now going to write out a dummy product
1021  // whose Wrapper present flag is false because the range set
1022  // got invalidated to present double counting when combining
1023  // run or subrun products from multiple fragments. We change
1024  // the provenance status that we are going to write out to
1025  // dummyToPreventDoubleCount to flag this case. Note that the
1026  // requirement is only that the status not be
1027  // productstatus::present(). We use a special code to make it
1028  // easier for humans to tell what is going on.
1029  auto prov_bid = prov->productID();
1030  if (keptprv.erase(*prov) != 1ull)
1031  {
1032  throw Exception(errors::LogicError, "KeptProvenance::setStatus")
1033  << "Attempt to set product status for product whose provenance "
1034  "is not being recorded.\n";
1035  }
1036  prov =
1037  keptprv
1038  .emplace(prov_bid, productstatus::dummyToPreventDoubleCount())
1039  .first;
1040  }
1041  auto const* product = getProduct<BT>(oh, rs, bd.wrappedName());
1042  setProductRangeSetID<BT>(
1043  rs, *rootFileDB_, const_cast<EDProduct*>(product), checksumToIndex);
1044  val.product = product;
1045  }
1046  }
1047  vpp->assign(keptprv.begin(), keptprv.end());
1048  for (auto const& val : *vpp)
1049  {
1050  if (val.productStatus() == productstatus::uninitialized())
1051  {
1052  throw Exception(errors::LogicError,
1053  "RootDAQOutFile::fillBranches(principal, vpp):")
1054  << "Attempt to write a product with uninitialized provenance!\n";
1055  }
1056  }
1057 
1058  TLOG(TLVL_DEBUG + 33) << "RootDAQOutFile::fillBranches before fillTree call";
1059  treePointers_[BT]->fillTree();
1060  TLOG(TLVL_DEBUG + 33) << "RootDAQOutFile::fillBranches after fillTree call";
1061  vpp->clear();
1062  TLOG(TLVL_DEBUG + 33) << "End of RootDAQOutFile::fillBranches";
1063 }
1064 
1065 } // namespace art