artdaq  3.12.07
RootDAQOutFile.cc
1 #include "artdaq/ArtModules/RootDAQOutput-s124/RootDAQOutFile.h"
2 #include "TRACE/tracemf.h"
3 // vim: set sw=2 expandtab :
4 
5 #include "artdaq/DAQdata/Globals.hh"
6 
7 #include "art/Framework/Core/OutputFileGranularity.h"
8 #include "art/Framework/Core/OutputModule.h"
9 #include "art/Framework/IO/ClosingCriteria.h"
10 #include "art/Framework/IO/FileStatsCollector.h"
11 #include "art/Framework/Principal/EventPrincipal.h"
12 #include "art/Framework/Principal/RangeSetsSupported.h"
13 #include "art/Framework/Principal/ResultsPrincipal.h"
14 #include "art/Framework/Principal/RunPrincipal.h"
15 #include "art/Framework/Principal/SubRunPrincipal.h"
16 #include "art/Framework/Services/Registry/ServiceHandle.h"
17 #include "art/Framework/Services/System/DatabaseConnection.h"
18 #include "art/Persistency/Provenance/ProcessHistoryRegistry.h"
19 #include "art_root_io/DropMetaData.h"
20 #include "art_root_io/GetFileFormatEra.h"
21 #include "art_root_io/GetFileFormatVersion.h"
22 #include "art_root_io/RootDB/TKeyVFSOpenPolicy.h"
23 #include "art_root_io/RootFileBlock.h"
24 #include "art_root_io/checkDictionaries.h"
25 #include "art_root_io/detail/getObjectRequireDict.h"
26 #include "boost/date_time/posix_time/posix_time.hpp"
27 #include "canvas/Persistency/Provenance/BranchChildren.h"
28 #include "canvas/Persistency/Provenance/BranchType.h"
29 #include "canvas/Persistency/Provenance/EventAuxiliary.h"
30 #include "canvas/Persistency/Provenance/EventID.h"
31 #include "canvas/Persistency/Provenance/FileFormatVersion.h"
32 #include "canvas/Persistency/Provenance/Parentage.h"
33 #include "canvas/Persistency/Provenance/ParentageRegistry.h"
34 #include "canvas/Persistency/Provenance/ProductStatus.h"
35 #include "canvas/Persistency/Provenance/ResultsAuxiliary.h"
36 #include "canvas/Persistency/Provenance/RunAuxiliary.h"
37 #include "canvas/Persistency/Provenance/SubRunAuxiliary.h"
38 #include "canvas/Persistency/Provenance/rootNames.h"
39 #include "canvas/Utilities/Exception.h"
40 #include "canvas_root_io/Utilities/DictionaryChecker.h"
41 #include "cetlib/canonical_string.h"
42 #include "cetlib/container_algorithms.h"
43 #include "cetlib/exempt_ptr.h"
44 #include "cetlib/sqlite/Ntuple.h"
45 #include "cetlib/sqlite/Transaction.h"
46 #include "cetlib/sqlite/create_table.h"
47 #include "cetlib/sqlite/exec.h"
48 #include "cetlib/sqlite/insert.h"
49 #include "fhiclcpp/ParameterSetRegistry.h"
50 #include "messagefacility/MessageLogger/MessageLogger.h"
51 #include "range/v3/view.hpp"
52 
53 #include "TBranch.h"
54 
55 #define TRACE_NAME (app_name + "_RootDAQOutFile").c_str()
56 
57 #include "TFile.h"
58 #include "TTree.h"
59 
60 #include <fcntl.h> // posix_fadvise POSIX_FADV_DONTNEED
61 #include <sys/sysinfo.h> // sysinfo(sysinfo*)
62 #include <algorithm>
63 #include <utility>
64 #include <vector>
65 
66 using namespace cet;
67 using namespace hep::concurrency;
68 
69 using art::BranchType;
71 using art::rootNames::metaBranchRootName;
72 
73 using std::map;
74 using std::string;
75 using std::vector;
76 
77 namespace {
78 
79 void create_table(sqlite3* const db,
80  string const& name,
81  vector<string> const& columns,
82  string const& suffix = {})
83 {
84  if (columns.empty())
85  {
86  throw art::Exception(art::errors::LogicError) // NOLINT(cert-err60-cpp)
87  << "Number of sqlite columns specified for table: " << name << '\n'
88  << "is zero.\n";
89  }
90  string ddl = "DROP TABLE IF EXISTS " + name +
91  "; "
92  "CREATE TABLE " +
93  name + "(" + columns.front();
94  for_each(columns.begin() + 1, columns.end(), [&ddl](auto const& col) {
95  ddl += "," + col;
96  });
97  ddl += ") ";
98  ddl += suffix;
99  ddl += ";";
100  sqlite::exec(db, ddl);
101 }
102 
103 void insert_eventRanges_row(sqlite3_stmt* stmt,
104  art::SubRunNumber_t const sr,
105  art::EventNumber_t const b,
106  art::EventNumber_t const e)
107 {
108  sqlite3_bind_int64(stmt, 1, sr);
109  sqlite3_bind_int64(stmt, 2, b);
110  sqlite3_bind_int64(stmt, 3, e);
111  sqlite3_step(stmt);
112  sqlite3_reset(stmt);
113 }
114 
115 void insert_rangeSets_eventSets_row(sqlite3_stmt* stmt,
116  unsigned const rsid,
117  unsigned const esid)
118 {
119  sqlite3_bind_int64(stmt, 1, rsid);
120  sqlite3_bind_int64(stmt, 2, esid);
121  sqlite3_step(stmt);
122  sqlite3_reset(stmt);
123 }
124 
125 unsigned
126 getNewRangeSetID(sqlite3* db,
127  art::BranchType const bt,
128  art::RunNumber_t const r)
129 {
130  sqlite::insert_into(db, art::BranchTypeToString(bt) + "RangeSets")
131  .values(r);
132  return sqlite3_last_insert_rowid(db);
133 }
134 
135 vector<unsigned>
136 getExistingRangeSetIDs(sqlite3* db, art::RangeSet const& rs)
137 {
138  using namespace std;
139  vector<unsigned> rangeSetIDs;
140  cet::transform_all(rs, back_inserter(rangeSetIDs), [db](auto const& range) {
141  sqlite::query_result<unsigned> r;
142  r << sqlite::select("ROWID")
143  .from(db, "EventRanges")
144  .where("SubRun=" + to_string(range.subRun()) +
145  " AND "
146  "begin=" +
147  to_string(range.begin()) +
148  " AND "
149  "end=" +
150  to_string(range.end()));
151  return unique_value(r);
152  });
153  return rangeSetIDs;
154 }
155 
156 void insertIntoEventRanges(sqlite3* db, art::RangeSet const& rs)
157 {
158  sqlite::Transaction txn{db};
159  sqlite3_stmt* stmt{nullptr};
160  string const ddl{
161  "INSERT INTO EventRanges(SubRun, begin, end) "
162  "VALUES(?, ?, ?);"};
163  sqlite3_prepare_v2(db, ddl.c_str(), -1, &stmt, nullptr);
164  for (auto const& range : rs)
165  {
166  insert_eventRanges_row(stmt, range.subRun(), range.begin(), range.end());
167  }
168  sqlite3_finalize(stmt);
169  txn.commit();
170 }
171 
172 void insertIntoJoinTable(sqlite3* db,
173  art::BranchType const bt,
174  unsigned const rsID,
175  vector<unsigned> const& eventRangesIDs)
176 {
177  sqlite::Transaction txn{db};
178  sqlite3_stmt* stmt{nullptr};
179  string const ddl{
180  "INSERT INTO " + art::BranchTypeToString(bt) +
181  "RangeSets_EventRanges(RangeSetsID, EventRangesID) Values(?,?);"};
182  sqlite3_prepare_v2(db, ddl.c_str(), -1, &stmt, nullptr);
183  cet::for_all(eventRangesIDs, [stmt, rsID](auto const eventRangeID) {
184  insert_rangeSets_eventSets_row(stmt, rsID, eventRangeID);
185  });
186  sqlite3_finalize(stmt);
187  txn.commit();
188 }
189 
190 void maybeInvalidateRangeSet(BranchType const bt,
191  art::RangeSet const& principalRS,
192  art::RangeSet& productRS)
193 {
194  assert(principalRS.is_sorted());
195  assert(productRS.is_sorted());
196  if (!productRS.is_valid())
197  {
198  return;
199  }
200  if (bt == art::InRun && productRS.is_full_run())
201  {
202  return;
203  }
204  if (bt == art::InSubRun && productRS.is_full_subRun())
205  {
206  return;
207  }
208  if (productRS.ranges().empty())
209  {
210  return;
211  }
212  auto const r = productRS.run();
213  auto const& productFront = productRS.ranges().front();
214  if (!principalRS.contains(r, productFront.subRun(), productFront.begin()))
215  {
216  productRS = art::RangeSet::invalid();
217  }
218 }
219 
220 // The purpose of 'maybeInvalidateRangeSet' is to support the
221 // following situation. Suppose process 1 creates three files with
222 // one Run product each, all corresponding to the same Run. Let's
223 // call the individual Run product instances in the three separate
224 // files as A, B, and C. Now suppose that the three files serve as
225 // inputs to process 2, where a concatenation is being performed AND
226 // ALSO an output file switch. Process 2 results in two output
227 // files, and now, in process 3, we concatenate the outputs from
228 // process 2. The situation would look like this:
229 //
230 // Process 1: [A] [B] [C]
231 // \ / \ /
232 // Process 2: [A + B] [B + C]
233 // \ / \ /
234 // D=agg(A,B) | | E=agg(B,C)
235 // \ /
236 // Process 3: [D + E]
237 //
238 // Notice the complication in process 3: product 'B' will be
239 // aggregated twice: once with A, and once with C. Whenever the
240 // output from process 3 is read as input to another process, the
241 // fetched product will be equivalent to A+2B+C.
242 //
243 // To avoid this situation, we compare the RangeSet of the product
244 // with the RangeSet of the in-memory RunAuxiliary. If the
245 // beginning of B's RangeSet is not contained within the auxiliary's
246 // RangeSet, then a dummy product with an invalid RangeSet is
247 // written to disk. Instead of the diagram above, we have:
248 //
249 // Process 1: [A] [B] [C]
250 // \ / \ /
251 // Process 2: [A + B] [x + C]
252 // \ / \ /
253 // D=agg(A,B) | | E=agg(x,C)=C
254 // \ /
255 // Process 3: [D + E]
256 //
257 // where 'x' represent a dummy product. Upon aggregating D and E,
258 // we obtain the correctly formed A+B+C product.
259 template<BranchType BT>
260 art::RangeSet
261 getRangeSet(art::OutputHandle const& oh,
262  art::RangeSet const& principalRS,
263  bool const producedInThisProcess)
264 {
265  if constexpr (!art::detail::range_sets_supported(BT))
266  {
267  return art::RangeSet::invalid();
268  }
269 
270  auto rs = oh.isValid() ? oh.rangeOfValidity() : art::RangeSet::invalid();
271  // Because a user can specify (e.g.):
272  // r.put(std::move(myProd), art::runFragment(myRangeSet));
273  // products that are produced in this process can have valid, yet
274  // arbitrary RangeSets. We therefore never invalidate a RangeSet
275  // that corresponds to a product produced in this process.
276  //
277  // It is possible for a user to specify a RangeSet which does not
278  // correspond AT ALL to the in-memory auxiliary RangeSet. In that
279  // case, users should not expect to be able to retrieve products
280  // for which no corresponding events or sub-runs were processed.
281  if (!producedInThisProcess)
282  {
283  maybeInvalidateRangeSet(BT, principalRS, rs);
284  }
285  return rs;
286 }
287 
288 template<BranchType BT>
289 void setProductRangeSetID(art::RangeSet const& rs,
290  sqlite3* db,
291  art::EDProduct* product,
292  map<unsigned, unsigned>& checksumToIndexLookup)
293 {
294  if constexpr (!art::detail::range_sets_supported(BT))
295  {
296  return;
297  }
298 
299  if (!rs.is_valid())
300  { // Invalid range-sets not written to DB
301  return;
302  }
303  // Set range sets for SubRun and Run products
304  auto it = checksumToIndexLookup.find(rs.checksum());
305  if (it != checksumToIndexLookup.cend())
306  {
307  product->setRangeSetID(it->second);
308  }
309  else
310  {
311  unsigned const rsID = getNewRangeSetID(db, BT, rs.run());
312  product->setRangeSetID(rsID);
313  checksumToIndexLookup.emplace(rs.checksum(), rsID);
314  insertIntoEventRanges(db, rs);
315  auto const& eventRangesIDs = getExistingRangeSetIDs(db, rs);
316  insertIntoJoinTable(db, BT, rsID, eventRangesIDs);
317  }
318 }
319 
320 } // unnamed namespace
321 
322 namespace art {
323 
324 OutputItem::~OutputItem() = default;
325 
326 OutputItem::OutputItem(BranchDescription const& bd)
327  : branchDescription{bd}, product{nullptr}
328 {}
329 
330 RootDAQOutFile::RootDAQOutFile(OutputModule* om,
331  string const& fileName,
332  ClosingCriteria const& fileSwitchCriteria,
333  int const compressionLevel,
334  unsigned freePercent,
335  unsigned freeMB,
336  int64_t const saveMemoryObjectThreshold,
337  int64_t const treeMaxVirtualSize,
338  int const splitLevel,
339  int const basketSize,
340  DropMetaData dropMetaData,
341  bool const dropMetaDataForDroppedData)
342  : om_{om}
343  , file_{fileName}
344  , fileSwitchCriteria_{fileSwitchCriteria}
345  , compressionLevel_{compressionLevel}
346  , freePercent_{freePercent}
347  , freeMB_{freeMB}
348  , saveMemoryObjectThreshold_{saveMemoryObjectThreshold}
349  , treeMaxVirtualSize_{treeMaxVirtualSize}
350  , splitLevel_{splitLevel}
351  , basketSize_{basketSize}
352  , dropMetaData_{dropMetaData}
353  , dropMetaDataForDroppedData_{dropMetaDataForDroppedData}
354  , filePtr_{TFile::Open(file_.c_str(), "recreate", "", compressionLevel)}
355 {
356  using std::make_unique;
357  // Don't split metadata tree or event description tree
358  metaDataTree_ = RootOutputTree::makeTTree(
359  filePtr_.get(), rootNames::metaDataTreeName(), 0);
360  fileIndexTree_ = RootOutputTree::makeTTree(
361  filePtr_.get(), rootNames::fileIndexTreeName(), 0);
362  parentageTree_ = RootOutputTree::makeTTree(
363  filePtr_.get(), rootNames::parentageTreeName(), 0);
364  treePointers_[0] =
365  make_unique<RootOutputTree>(filePtr_.get(),
366  InEvent,
367  pEventAux_,
368  pEventProductProvenanceVector_,
369  basketSize,
370  splitLevel,
371  treeMaxVirtualSize,
372  saveMemoryObjectThreshold);
373  treePointers_[1] =
374  make_unique<RootOutputTree>(filePtr_.get(),
375  InSubRun,
376  pSubRunAux_,
377  pSubRunProductProvenanceVector_,
378  basketSize,
379  splitLevel,
380  treeMaxVirtualSize,
381  saveMemoryObjectThreshold);
382  treePointers_[2] = make_unique<RootOutputTree>(filePtr_.get(),
383  InRun,
384  pRunAux_,
385  pRunProductProvenanceVector_,
386  basketSize,
387  splitLevel,
388  treeMaxVirtualSize,
389  saveMemoryObjectThreshold);
390  treePointers_[3] =
391  make_unique<RootOutputTree>(filePtr_.get(),
392  InResults,
393  pResultsAux_,
394  pResultsProductProvenanceVector_,
395  basketSize,
396  splitLevel,
397  treeMaxVirtualSize,
398  saveMemoryObjectThreshold);
399 #if ART_HEX_VERSION > 0x31400
400  rootFileDB_ = ServiceHandle<DatabaseConnection> {
401  } -> get<TKeyVFSOpenPolicy>("RootFileDB",
402  filePtr_.get(),
403  SQLITE_OPEN_CREATE | SQLITE_OPEN_READWRITE);
404 #else
405  rootFileDB_.reset(ServiceHandle<DatabaseConnection> {
406  } -> get<TKeyVFSOpenPolicy>("RootFileDB",
407  filePtr_.get(),
408  SQLITE_OPEN_CREATE | SQLITE_OPEN_READWRITE));
409 #endif
410  beginTime_ = std::chrono::steady_clock::now();
411  // Check that dictionaries for the auxiliaries exist
412  root::DictionaryChecker checker;
413  checker.checkDictionaries<EventAuxiliary>();
414  checker.checkDictionaries<SubRunAuxiliary>();
415  checker.checkDictionaries<RunAuxiliary>();
416  checker.checkDictionaries<ResultsAuxiliary>();
417  checker.reportMissingDictionaries();
418 
419  createDatabaseTables();
420  TLOG(TLVL_DEBUG + 32) << "RootDAQOutFile ctor complete";
421 }
422 
423 art::RootDAQOutFile::~RootDAQOutFile()
424 {
425  struct sysinfo info;
426  int sts = sysinfo(&info);
427  auto free_percent = static_cast<unsigned>(info.freeram * 100 / info.totalram);
428  auto free_MB = static_cast<unsigned>(info.freeram * info.mem_unit >> 20); // round down (1024.9 => 1024 MB)
429  TRACE(TLVL_DEBUG + 32, "~RootDAQOutFile free %%%u %.1fMB (%u) buffers=%fGB mem_unit=%u", // NOLINT
430  free_percent, static_cast<float>(info.freeram * info.mem_unit / (1024 * 1024.0)),
431  free_MB, static_cast<float>(info.bufferram * info.mem_unit / (1024 * 1024 * 1024.0)), info.mem_unit);
432  if (free_percent < freePercent_ || free_MB < freeMB_)
433  {
434  TLOG(TLVL_DEBUG + 32) << "RootDAQOutFile Flush/DONTNEED";
435  filePtr_->Flush();
436  sts = posix_fadvise(filePtr_->GetFd(), 0, 0 /*len,0=all*/, POSIX_FADV_DONTNEED);
437  }
438  TLOG(TLVL_DEBUG + 32) << "~RootDAQOutFile complete sts=" << sts;
439 }
440 
441 void art::RootDAQOutFile::createDatabaseTables()
442 {
443  // Event ranges
444  create_table(*rootFileDB_,
445  "EventRanges",
446  {"SubRun INTEGER",
447  "begin INTEGER",
448  "end INTEGER",
449  "UNIQUE (SubRun,begin,end) ON CONFLICT IGNORE"});
450  // SubRun range sets
451  using namespace cet::sqlite;
452  create_table(*rootFileDB_, "SubRunRangeSets", column<int>{"Run"});
453  create_table(*rootFileDB_,
454  "SubRunRangeSets_EventRanges",
455  {"RangeSetsID INTEGER",
456  "EventRangesID INTEGER",
457  "PRIMARY KEY(RangeSetsID,EventRangesID)"},
458  "WITHOUT ROWID");
459  // Run range sets
460  create_table(*rootFileDB_, "RunRangeSets", column<int>{"Run"});
461  create_table(*rootFileDB_,
462  "RunRangeSets_EventRanges",
463  {"RangeSetsID INTEGER",
464  "EventRangesID INTEGER",
465  "PRIMARY KEY(RangeSetsID,EventRangesID)"},
466  "WITHOUT ROWID");
467 }
468 
469 void RootDAQOutFile::setFileStatus(OutputFileStatus const ofs)
470 {
471  std::lock_guard sentry{mutex_};
472  status_ = ofs;
473 }
474 
475 string const&
476 RootDAQOutFile::currentFileName() const
477 {
478  std::lock_guard sentry{mutex_};
479  return file_;
480 }
481 
482 void RootDAQOutFile::selectProducts()
483 {
484  std::lock_guard sentry{mutex_};
485  auto selectProductsToWrite = [this](BranchType const bt) {
486  auto& items = selectedOutputItemList_[bt];
487  for (auto const& pd : om_->keptProducts()[bt] | ranges::views::values)
488  {
489  // Persist Results products only if they have been produced by
490  // the current process.
491  if (bt == InResults && !pd.produced())
492  {
493  continue;
494  }
495  checkDictionaries(pd);
496  // Although the transient flag is already checked when
497  // OutputModule::doSelectProducts is called, it can be flipped
498  // to 'true' after the BranchDescription transients have been
499  // fluffed, which happens during the checkDictionaries call.
500  if (pd.transient())
501  {
502  continue;
503  }
504  items.try_emplace(pd.productID(), pd);
505  }
506  for (auto const& item : items | ranges::views::values)
507  {
508  treePointers_[bt]->addOutputBranch(item.branchDescription,
509  item.product);
510  }
511  };
512  for_each_branch_type(selectProductsToWrite);
513 }
514 
515 void RootDAQOutFile::beginInputFile(RootFileBlock const* rfb,
516  FastCloningEnabled fastCloningEnabled)
517 {
518  std::lock_guard sentry{mutex_};
519 
520  // Create output branches, and then redo calculation to determine
521  // if fast cloning should be done.
522  selectProducts();
523 
524  cet::exempt_ptr<TTree const> inputTree{nullptr};
525  if (rfb)
526  {
527  if (rfb->fileFormatVersion().value_ < 10)
528  {
529  fastCloningEnabled.disable(
530  "The input file has a different ProductID "
531  "schema than the in-memory schema.");
532  }
533  inputTree = rfb->tree();
534  if (inputTree)
535  {
536  if (!treePointers_[InEvent]->checkSplitLevelAndBasketSize(inputTree))
537  {
538  fastCloningEnabled.disable(
539  "The splitting level and/or basket size does not match between "
540  "input and output file.");
541  }
542  if (inputTree->GetCurrentFile()->GetVersion() < 60001)
543  {
544  fastCloningEnabled.disable(
545  "The ROOT version used to write the input file (< 6.00/01)\nhas a "
546  "different splitting policy.");
547  }
548  }
549  }
550 
551  if (not fastCloningEnabled)
552  {
553  mf::LogWarning("FastCloning") << fastCloningEnabled.disabledBecause();
554  return;
555  }
556 
557  mf::LogInfo("FastCloning")
558  << "Fast cloning event data products from input file.";
559  wasFastCloned_ = treePointers_[InEvent]->fastCloneTree(inputTree);
560 }
561 
562 void RootDAQOutFile::incrementInputFileNumber()
563 {
564  std::lock_guard sentry{mutex_};
565  fp_.update_inputFile();
566 }
567 
568 void RootDAQOutFile::respondToCloseInputFile(FileBlock const&)
569 {
570  std::lock_guard sentry{mutex_};
571  cet::for_all(treePointers_, [](auto const& p) { p->setEntries(); });
572 }
573 
574 bool RootDAQOutFile::requestsToCloseFile()
575 {
576  std::lock_guard sentry{mutex_};
577  using namespace std::chrono;
578  unsigned int constexpr oneK{1024u};
579  fp_.updateSize(filePtr_->GetSize() / oneK);
580  fp_.updateAge(duration_cast<seconds>(steady_clock::now() - beginTime_));
581  return fileSwitchCriteria_.should_close(fp_);
582 }
583 
584 void RootDAQOutFile::writeOne(EventPrincipal const& e)
585 {
586  std::lock_guard sentry{mutex_};
587  TLOG(TLVL_DEBUG + 33) << "Start of RootDAQOutFile::writeOne";
588  // Note: The pEventAux_ must be set before calling fillBranches
589  // since it gets written out in that routine.
590  pEventAux_ = &e.eventAux();
591  // Because getting the data may cause an exception to be thrown we
592  // want to do that first before writing anything to the file about
593  // this event.
594  fillBranches<InEvent>(e, pEventProductProvenanceVector_);
595 
596  // Add the dataType to the job report if it hasn't already been done
597  if (!dataTypeReported_)
598  {
599  string dataType{"MC"};
600  if (pEventAux_->isRealData())
601  {
602  dataType = "Data";
603  }
604  dataTypeReported_ = true;
605  }
606  // Add event to index
607  fileIndex_.addEntry(pEventAux_->eventID(), fp_.eventEntryNumber());
608  fp_.update_event();
609  TLOG(TLVL_DEBUG + 33) << "End of RootDAQOutFile::writeOne";
610 }
611 
612 void RootDAQOutFile::writeSubRun(SubRunPrincipal const& sr)
613 {
614  std::lock_guard sentry{mutex_};
615  pSubRunAux_ = &sr.subRunAux();
616  pSubRunAux_->setRangeSetID(subRunRSID_);
617  fillBranches<InSubRun>(sr, pSubRunProductProvenanceVector_);
618  fileIndex_.addEntry(EventID::invalidEvent(pSubRunAux_->subRunID()),
619  fp_.subRunEntryNumber());
620  fp_.update_subRun(status_);
621 }
622 
623 void RootDAQOutFile::writeRun(RunPrincipal const& r)
624 {
625  std::lock_guard sentry{mutex_};
626  pRunAux_ = &r.runAux();
627  pRunAux_->setRangeSetID(runRSID_);
628  fillBranches<InRun>(r, pRunProductProvenanceVector_);
629  fileIndex_.addEntry(EventID::invalidEvent(pRunAux_->runID()),
630  fp_.runEntryNumber());
631  fp_.update_run(status_);
632 }
633 
634 void RootDAQOutFile::writeParentageRegistry()
635 {
636  std::lock_guard sentry{mutex_};
637  auto pid = root::getObjectRequireDict<ParentageID>();
638  ParentageID const* hash = &pid;
639  if (!parentageTree_->Branch(
640  rootNames::parentageIDBranchName().c_str(), &hash, basketSize_, 0))
641  {
642  throw Exception(errors::FatalRootError) // NOLINT(cert-err60-cpp)
643  << "Failed to create a branch for ParentageIDs in the output file";
644  }
645  hash = nullptr;
646  auto par = root::getObjectRequireDict<Parentage>();
647  Parentage const* desc = &par;
648  if (!parentageTree_->Branch(
649  rootNames::parentageBranchName().c_str(), &desc, basketSize_, 0))
650  {
651  throw Exception(errors::FatalRootError) // NOLINT(cert-err60-cpp)
652  << "Failed to create a branch for Parentages in the output file";
653  }
654  desc = nullptr;
655  for (auto const& pr : ParentageRegistry::get())
656  {
657  hash = &pr.first;
658  desc = &pr.second;
659  parentageTree_->Fill();
660  }
661  parentageTree_->SetBranchAddress(rootNames::parentageIDBranchName().c_str(),
662  nullptr);
663  parentageTree_->SetBranchAddress(rootNames::parentageBranchName().c_str(),
664  nullptr);
665 }
666 
667 void RootDAQOutFile::writeFileFormatVersion()
668 {
669  std::lock_guard sentry{mutex_};
670  FileFormatVersion const ver{getFileFormatVersion(), getFileFormatEra()};
671  auto const* pver = &ver;
672  TBranch* b = metaDataTree_->Branch(
673  metaBranchRootName<FileFormatVersion>(), &pver, basketSize_, 0);
674  // FIXME: Turn this into a throw!
675  assert(b);
676  b->Fill();
677 }
678 
679 void RootDAQOutFile::writeFileIndex()
680 {
681  std::lock_guard sentry{mutex_};
682  fileIndex_.sortBy_Run_SubRun_Event();
683  FileIndex::Element elem{};
684  auto const* findexElemPtr = &elem;
685  TBranch* b = fileIndexTree_->Branch(
686  metaBranchRootName<FileIndex::Element>(), &findexElemPtr, basketSize_, 0);
687  // FIXME: Turn this into a throw!
688  assert(b);
689  for (auto& entry : fileIndex_)
690  {
691  findexElemPtr = &entry;
692  b->Fill();
693  }
694  b->SetAddress(0);
695 }
696 
697 void RootDAQOutFile::writeProcessConfigurationRegistry()
698 {
699  // We don't do this yet; currently we're storing a slightly
700  // bloated ProcessHistoryRegistry.
701 }
702 
703 void RootDAQOutFile::writeProcessHistoryRegistry()
704 {
705  std::lock_guard sentry{mutex_};
706  ProcessHistoryMap pHistMap;
707  for (auto const& pr : ProcessHistoryRegistry::get())
708  {
709  pHistMap.emplace(pr);
710  }
711  auto const* p = &pHistMap;
712  TBranch* b = metaDataTree_->Branch(
713  metaBranchRootName<ProcessHistoryMap>(), &p, basketSize_, 0);
714  if (b == nullptr)
715  {
716  throw Exception(errors::LogicError) // NOLINT(cert-err60-cpp)
717  << "Unable to locate required "
718  "ProcessHistoryMap branch in output "
719  "metadata tree.\n";
720  }
721  b->Fill();
722 }
723 
724 void RootDAQOutFile::writeFileCatalogMetadata(
725  FileStatsCollector const& stats,
726  FileCatalogMetadata::collection_type const& md,
727  FileCatalogMetadata::collection_type const& ssmd)
728 {
729  std::lock_guard sentry{mutex_};
730  using namespace cet::sqlite;
731  Ntuple<string, string> fileCatalogMetadata{
732  *rootFileDB_, "FileCatalog_metadata", {{"Name", "Value"}}, true};
733  for (auto const& [key, value] : md)
734  {
735  fileCatalogMetadata.insert(key, value);
736  }
737 
738  // Add our own specific information: File format and friends.
739  fileCatalogMetadata.insert("file_format", "\"artroot\"");
740 
741  // File start time.
742  namespace bpt = boost::posix_time;
743  auto formatted_time = [](auto const& t) {
744  return cet::canonical_string(bpt::to_iso_extended_string(t));
745  };
746  fileCatalogMetadata.insert("start_time",
747  formatted_time(stats.outputFileOpenTime()));
748  // File "end" time: now, since file is not actually closed yet.
749  fileCatalogMetadata.insert(
750  "end_time",
751  formatted_time(boost::posix_time::second_clock::universal_time()));
752  // Run/subRun information.
753  if (!stats.seenSubRuns().empty())
754  {
755  auto I = find_if(md.crbegin(), md.crend(), [](auto const& p) {
756  return p.first == "art.run_type";
757  });
758  if (I != md.crend())
759  {
760  std::ostringstream buf;
761  buf << "[ ";
762  for (auto const& srid : stats.seenSubRuns())
763  {
764  buf << "[ " << srid.run() << ", " << srid.subRun() << ", "
765  << cet::canonical_string(I->second) << " ], ";
766  }
767  // Rewind over last delimiter.
768  buf.seekp(-2, std::ios_base::cur);
769  buf << " ]";
770  fileCatalogMetadata.insert("runs", buf.str());
771  }
772  }
773  // Number of events.
774  fileCatalogMetadata.insert("event_count",
775  std::to_string(stats.eventsThisFile()));
776  fileCatalogMetadata.insert("first_event",
777  std::to_string(stats.lowestEventID().event()));
778  fileCatalogMetadata.insert("last_event",
779  std::to_string(stats.highestEventID().event()));
780  // File parents.
781  if (!stats.parents().empty())
782  {
783  std::ostringstream pstring;
784  pstring << "[ ";
785  for (auto const& parent : stats.parents())
786  {
787  pstring << cet::canonical_string(parent) << ", ";
788  }
789  // Rewind over last delimiter.
790  pstring.seekp(-2, std::ios_base::cur);
791  pstring << " ]";
792  fileCatalogMetadata.insert("parents", pstring.str());
793  }
794 
795  // The following need to be encapsulated in an art table
796  // first_event and last_event.
797  auto eidToTuple = [](EventID const& eid) -> string {
798  std::ostringstream eidStr;
799  eidStr << "[ " << eid.run() << ", " << eid.subRun() << ", " << eid.event()
800  << " ]";
801  return eidStr.str();
802  };
803  fileCatalogMetadata.insert("art.first_event",
804  eidToTuple(stats.lowestEventID()));
805  fileCatalogMetadata.insert("art.last_event",
806  eidToTuple(stats.highestEventID()));
807  fileCatalogMetadata.insert("art.file_format_era",
808  cet::canonical_string(getFileFormatEra()));
809  fileCatalogMetadata.insert("art.file_format_version",
810  std::to_string(getFileFormatVersion()));
811 
812  // Incoming stream-specific metadata overrides.
813  for (auto const& [key, value] : ssmd)
814  {
815  fileCatalogMetadata.insert(key, value);
816  }
817 }
818 
819 void RootDAQOutFile::writeParameterSetRegistry()
820 {
821  std::lock_guard sentry{mutex_};
822  fhicl::ParameterSetRegistry::exportTo(*rootFileDB_);
823 }
824 
825 void RootDAQOutFile::writeProductDescriptionRegistry()
826 {
827  std::lock_guard sentry{mutex_};
828  // Make a local copy of the UpdateOutputCallbacks's ProductList,
829  // removing any transient or pruned products.
830  ProductRegistry reg;
831  auto productDescriptionsToWrite = [this, &reg](BranchType const bt) {
832  for (auto const& desc :
833  descriptionsToPersist_[bt] | ranges::views::values)
834  {
835  reg.productList_.emplace(BranchKey{desc}, desc);
836  }
837  };
838  for_each_branch_type(productDescriptionsToWrite);
839  ProductRegistry const* regp = &reg;
840  TBranch* b = metaDataTree_->Branch(
841  metaBranchRootName<ProductRegistry>(), &regp, basketSize_, 0);
842  // FIXME: Turn this into a throw!
843  assert(b);
844  b->Fill();
845 }
846 
847 void RootDAQOutFile::writeProductDependencies()
848 {
849  std::lock_guard sentry{mutex_};
850  BranchChildren const* ppDeps = &om_->branchChildren();
851  TBranch* b = metaDataTree_->Branch(
852  metaBranchRootName<BranchChildren>(), &ppDeps, basketSize_, 0);
853  // FIXME: Turn this into a throw!
854  assert(b);
855  b->Fill();
856 }
857 
858 void RootDAQOutFile::writeResults(ResultsPrincipal& resp)
859 {
860  std::lock_guard sentry{mutex_};
861  pResultsAux_ = &resp.resultsAux();
862  fillBranches<InResults>(resp, pResultsProductProvenanceVector_);
863 }
864 
865 void RootDAQOutFile::writeTTrees()
866 {
867  TLOG(TLVL_DEBUG + 33) << "Start of RootDAQOutFile::writeTTrees";
868  std::lock_guard sentry{mutex_};
869  RootOutputTree::writeTTree(metaDataTree_);
870  TLOG(TLVL_DEBUG + 33) << "RootDAQOutFile::writeTTrees after writing metaDataTree_";
871  RootOutputTree::writeTTree(fileIndexTree_);
872  TLOG(TLVL_DEBUG + 33) << "RootDAQOutFile::writeTTrees after writing fileIndexTree_";
873  RootOutputTree::writeTTree(parentageTree_);
874  TLOG(TLVL_DEBUG + 33) << "RootDAQOutFile::writeTTrees after writing parentageTree_";
875  for_each_branch_type(
876  [this](BranchType const bt) { treePointers_[bt]->writeTree(); });
877  TLOG(TLVL_DEBUG + 33) << "End of RootDAQOutFile::writeTTrees";
878 }
879 
880 void RootDAQOutFile::setSubRunAuxiliaryRangeSetID(RangeSet const& ranges)
881 {
882  std::lock_guard sentry{mutex_};
883  subRunRSID_ = getNewRangeSetID(*rootFileDB_, InSubRun, ranges.run());
884  insertIntoEventRanges(*rootFileDB_, ranges);
885  auto const& eventRangesIDs = getExistingRangeSetIDs(*rootFileDB_, ranges);
886  insertIntoJoinTable(*rootFileDB_, InSubRun, subRunRSID_, eventRangesIDs);
887 }
888 
889 void RootDAQOutFile::setRunAuxiliaryRangeSetID(RangeSet const& ranges)
890 {
891  std::lock_guard sentry{mutex_};
892  runRSID_ = getNewRangeSetID(*rootFileDB_, InRun, ranges.run());
893  insertIntoEventRanges(*rootFileDB_, ranges);
894  auto const& eventRangesIDs = getExistingRangeSetIDs(*rootFileDB_, ranges);
895  insertIntoJoinTable(*rootFileDB_, InRun, runRSID_, eventRangesIDs);
896 }
897 
898 template<BranchType BT>
899 EDProduct const*
900 RootDAQOutFile::getProduct(OutputHandle const& oh,
901  RangeSet const& prunedProductRS,
902  string const& wrappedName)
903 {
904  std::lock_guard sentry{mutex_};
905  if constexpr (detail::range_sets_supported(BT))
906  {
907  if (!prunedProductRS.is_valid())
908  {
909  return dummyProductCache_.product(wrappedName);
910  }
911  }
912  return oh.isValid() ? oh.wrapper() : dummyProductCache_.product(wrappedName);
913 }
914 
915 template<BranchType BT>
916 void RootDAQOutFile::fillBranches(Principal const& principal,
917  vector<ProductProvenance>* vpp)
918 {
919  TLOG(TLVL_DEBUG + 33) << "Start of RootDAQOutFile::fillBranches";
920  std::lock_guard sentry{mutex_};
921  bool const fastCloning{BT == InEvent && wasFastCloned_};
922  map<unsigned, unsigned> checksumToIndex;
923  auto const& principalRS = principal.seenRanges();
924 
925  // Local variables to avoid many functions calls to
926  // DropMetaData::operator==().
927  bool const drop_no_metadata{dropMetaData_ == DropMetaData::DropNone};
928  bool const drop_prior_metadata{dropMetaData_ == DropMetaData::DropPrior};
929  bool const drop_all_metadata{dropMetaData_ == DropMetaData::DropAll};
930 
931  std::set<ProductProvenance> keptprv;
932  for (auto const& [pid, val] : selectedOutputItemList_[BT])
933  {
934  auto const& bd = val.branchDescription;
935  descriptionsToPersist_[BT].try_emplace(pid, bd);
936  bool const produced = bd.produced();
937  bool const resolveProd{produced || !fastCloning ||
938  treePointers_[BT]->uncloned(bd.branchName())};
939  // Update the kept provenance
940  bool const keepProvenance =
941  drop_no_metadata || (produced && drop_prior_metadata);
942  auto const& oh = principal.getForOutput(pid, resolveProd);
943  auto prov = keptprv.begin();
944  if (keepProvenance)
945  {
946  if (oh.productProvenance())
947  {
948  prov = keptprv.insert(*oh.productProvenance()).first;
949  if (!drop_all_metadata && !dropMetaDataForDroppedData_)
950  {
951  {
952  vector<ProductProvenance const*> stacked_pp;
953  stacked_pp.push_back(&*oh.productProvenance());
954  while (not empty(stacked_pp))
955  {
956  auto current_pp = stacked_pp.back();
957  stacked_pp.pop_back();
958  for (auto const parent_bid :
959  current_pp->parentage().parents())
960  {
961  // Note: Suppose the parent ProductID corresponds to
962  // product that has been requested to be
963  // "dropped"--i.e. someone has specified "drop
964  // *_m1a_*_*" in their configuration, and
965  // although a given product matching this
966  // pattern will not be included in the
967  // selectedProducts_ list, one of the parents of
968  // a selected product can match the "dropping"
969  // pattern and its BranchDescription will still
970  // be written to disk since it is inserted into
971  // the descriptionsToPersist_ data member.
972  auto parent_bd = principal.getProductDescription(parent_bid);
973  if (!parent_bd)
974  {
975  // FIXME: Is this an error condition?
976  continue;
977  }
978  descriptionsToPersist_[BT].try_emplace(parent_bid,
979  *parent_bd);
980  if (!parent_bd->produced())
981  {
982  // We got it from the input, nothing to do.
983  continue;
984  }
985  auto parent_pp =
986  principal.branchToProductProvenance(parent_bid);
987  if (!parent_pp || !drop_no_metadata)
988  {
989  continue;
990  }
991  if (!keptprv.insert(*parent_pp).second)
992  {
993  // Already there, done.
994  continue;
995  }
996  if (!drop_all_metadata && !dropMetaDataForDroppedData_)
997  {
998  stacked_pp.push_back(parent_pp.get());
999  }
1000  }
1001  }
1002  }
1003  }
1004  }
1005  else
1006  {
1007  // No provenance: product was either not produced, or was
1008  // dropped; create provenance to remember that.
1009  auto status = productstatus::dropped();
1010  if (produced)
1011  {
1012  status = productstatus::neverCreated();
1013  }
1014  prov = keptprv.emplace(pid, status).first;
1015  }
1016  }
1017  // Resolve the product if we are going to attempt to write it out.
1018  if (resolveProd)
1019  {
1020  // Product was either produced, or we are not cloning the whole
1021  // file and the product branch was not cloned so we should be
1022  // able to get a pointer to it from the passed principal and
1023  // write it out.
1024  auto const& rs = getRangeSet<BT>(oh, principalRS, produced);
1025  if (detail::range_sets_supported(BT) && !rs.is_valid())
1026  {
1027  // At this point we are now going to write out a dummy product
1028  // whose Wrapper present flag is false because the range set
1029  // got invalidated to present double counting when combining
1030  // run or subrun products from multiple fragments. We change
1031  // the provenance status that we are going to write out to
1032  // dummyToPreventDoubleCount to flag this case. Note that the
1033  // requirement is only that the status not be
1034  // productstatus::present(). We use a special code to make it
1035  // easier for humans to tell what is going on.
1036  auto prov_bid = prov->productID();
1037  if (keptprv.erase(*prov) != 1ull)
1038  {
1039  throw Exception(errors::LogicError, "KeptProvenance::setStatus")
1040  << "Attempt to set product status for product whose provenance "
1041  "is not being recorded.\n";
1042  }
1043  prov =
1044  keptprv
1045  .emplace(prov_bid, productstatus::dummyToPreventDoubleCount())
1046  .first;
1047  }
1048  auto const* product = getProduct<BT>(oh, rs, bd.wrappedName());
1049  setProductRangeSetID<BT>(
1050  rs, *rootFileDB_, const_cast<EDProduct*>(product), checksumToIndex);
1051  val.product = product;
1052  }
1053  }
1054  vpp->assign(keptprv.begin(), keptprv.end());
1055  for (auto const& val : *vpp)
1056  {
1057  if (val.productStatus() == productstatus::uninitialized())
1058  {
1059  throw Exception(errors::LogicError,
1060  "RootDAQOutFile::fillBranches(principal, vpp):")
1061  << "Attempt to write a product with uninitialized provenance!\n";
1062  }
1063  }
1064 
1065  TLOG(TLVL_DEBUG + 33) << "RootDAQOutFile::fillBranches before fillTree call";
1066  treePointers_[BT]->fillTree();
1067  TLOG(TLVL_DEBUG + 33) << "RootDAQOutFile::fillBranches after fillTree call";
1068  vpp->clear();
1069  TLOG(TLVL_DEBUG + 33) << "End of RootDAQOutFile::fillBranches";
1070 }
1071 
1072 } // namespace art