1 #include "artdaq/DAQdata/Globals.hh"
2 #include "artdaq/DAQrate/EventStore.hh"
12 #include "cetlib/exception.h"
13 #include "artdaq-core/Core/StatisticsCollection.hh"
14 #include "artdaq-core/Core/SimpleQueueReader.hh"
15 #include "artdaq/DAQrate/detail/RequestMessage.hh"
16 #include "artdaq/Application/Routing/RoutingPacket.hh"
23 const std::string EventStore::EVENT_RATE_STAT_KEY(
"EventStoreEventRate");
24 const std::string EventStore::INCOMPLETE_EVENT_STAT_KEY(
"EventStoreIncompleteEvents");
26 EventStore::EventStore(
const fhicl::ParameterSet& pset,
size_t num_fragments_per_event,
run_id_t run,
27 size_t event_queue_depth,
size_t max_incomplete_event_count)
28 : num_fragments_per_event_(num_fragments_per_event)
29 , max_queue_size_(pset.get<size_t>(
"event_queue_depth", event_queue_depth))
30 , max_incomplete_count_(pset.get<size_t>(
"max_incomplete_events", max_incomplete_event_count))
34 , queue_(getGlobalQueue(max_queue_size_))
35 , reader_thread_launch_time_(std::chrono::steady_clock::now())
36 , send_requests_(pset.get<bool>(
"send_requests", false))
38 , request_port_(pset.get<int>(
"request_port", 3001))
39 , request_delay_(pset.get<size_t>(
"request_delay_ms", 10))
40 , multicast_out_addr_(pset.get<std::string>(
"output_address",
"localhost"))
42 , lastFlushedSeqID_(0)
43 , highestSeqIDSeen_(0)
44 , enq_timeout_(pset.get<double>(
"event_queue_wait_time", 5.0))
45 , enq_check_count_(pset.get<size_t>(
"event_queue_check_count", 5000))
46 , printSummaryStats_(pset.get<bool>(
"print_event_store_stats", false))
47 , incomplete_event_report_interval_ms_(pset.get<int>(
"incomplete_event_report_interval_ms", -1))
48 , last_incomplete_event_report_time_(std::chrono::steady_clock::now())
50 , art_thread_wait_ms_(pset.get<int>(
"art_thread_wait_ms", 4000))
52 TLOG_DEBUG(
"EventStore") <<
"EventStore CONSTRUCTOR" << TLOG_ENDL;
54 setup_requests_(pset.get<std::string>(
"request_address",
"227.128.12.26"));
56 auto rmConfig = pset.get<fhicl::ParameterSet>(
"routing_token_config", fhicl::ParameterSet());
57 send_routing_tokens_ = rmConfig.get<
bool>(
"use_routing_master",
false);
58 token_port_ = rmConfig.get<
int>(
"routing_token_port", 35555);
59 token_address_ = rmConfig.get<std::string>(
"routing_master_hostname",
"localhost");
61 TRACE(12,
"artdaq::EventStore::EventStore ctor - reader_thread_ initialized");
65 size_t num_fragments_per_event,
69 ART_CMDLINE_FCN* reader)
70 :
EventStore(pset, num_fragments_per_event, run, 50, 50)
71 { reader_thread_ = (std::async(std::launch::async, reader, argc, argv)); }
74 size_t num_fragments_per_event,
76 const std::string& configString,
77 ART_CFGSTRING_FCN* reader)
78 :
EventStore(pset, num_fragments_per_event, run, 20, 20)
79 { reader_thread_ = (std::async(std::launch::async, reader, configString)); }
83 TLOG_DEBUG(
"EventStore") <<
"Shutting down EventStore" << TLOG_ENDL;
84 if (printSummaryStats_)
88 shutdown(request_socket_, 2);
89 close(request_socket_);
90 shutdown(token_socket_, 2);
95 bool printWarningWhenFragmentIsDropped)
99 assert(pfrag !=
nullptr);
100 assert(pfrag->fragmentID() != Fragment::InvalidFragmentID);
110 if (pfrag->sequenceID() > highestSeqIDSeen_)
112 highestSeqIDSeen_ = pfrag->sequenceID();
115 Fragment::timestamp_t timestamp = pfrag->timestamp();
120 std::lock_guard<std::mutex> lk(request_mutex_);
121 active_requests_[highestSeqIDSeen_] = timestamp;
125 Fragment::sequence_id_t sequence_id = ((pfrag->sequenceID() - (1 + lastFlushedSeqID_)) / seqIDModulus_) + 1;
126 TRACE(13,
"EventStore::insert seq=%lu fragID=%d id=%d lastFlushed=%lu seqIDMod=%d seq=%lu"
127 , pfrag->sequenceID(), pfrag->fragmentID(), my_rank, lastFlushedSeqID_, seqIDModulus_, sequence_id);
132 EventMap::iterator loc = events_.lower_bound(sequence_id);
134 if (loc == events_.end() || events_.key_comp()(sequence_id, loc->first))
138 RawEvent_ptr newevent(
new RawEvent(run_id_, subrun_id_, pfrag->sequenceID()));
140 events_.insert(loc, EventMap::value_type(sequence_id, newevent));
144 loc->second->insertFragment(std::move(pfrag));
145 if (loc->second->numFragments() == num_fragments_per_event_)
150 RawEvent_ptr complete_event(loc->second);
151 complete_event->markComplete();
157 std::lock_guard<std::mutex> lk(request_mutex_);
158 active_requests_.erase(sequence_id);
163 MonitoredQuantityPtr mqPtr = StatisticsCollection::getInstance().
165 if (mqPtr.get() != 0)
167 mqPtr->addSample(complete_event->wordCount());
169 TRACE(14,
"EventStore::insert seq=%lu enqTimedWait start", sequence_id);
170 bool enqSuccess = queue_.enqTimedWait(complete_event, enq_timeout_);
171 TRACE(enqSuccess ? 14 : 0,
"EventStore::insert seq=%lu enqTimedWait complete", sequence_id);
175 if (printWarningWhenFragmentIsDropped)
177 TLOG_WARNING(
"EventStore") <<
"Enqueueing event " << sequence_id
178 <<
" FAILED, queue size = "
180 "; apparently no events were removed from this process's queue during the " << std::to_string(enq_timeout_.count())
181 <<
"-second timeout period" << TLOG_ENDL;
185 TLOG_DEBUG(
"EventStore") <<
"Enqueueing event " << sequence_id
186 <<
" FAILED, queue size = "
188 "; apparently no events were removed from this process's queue during the " << std::to_string(enq_timeout_.count())
189 <<
"-second timeout period" << TLOG_ENDL;
194 send_routing_token_(1);
197 MonitoredQuantityPtr mqPtr = StatisticsCollection::getInstance().
199 if (mqPtr.get() != 0)
201 mqPtr->addSample(events_.size());
211 TRACE(12,
"EventStore: Testing if queue is full");
214 size_t sleepTime = 1000000 * (enq_timeout_.count() / enq_check_count_);
215 TRACE(12,
"EventStore: sleepTime is %lu.", sleepTime);
216 size_t loopCount = 0;
217 while (loopCount < enq_check_count_ && queue_.full())
224 rejectedFragment = std::move(pfrag);
228 TRACE(12,
"EventStore: Testing if there's room in the EventStore");
229 auto incomplete_full = events_.size() >= max_incomplete_count_;
232 EventMap::iterator loc = events_.lower_bound(pfrag->sequenceID());
234 if (loc == events_.end() || events_.key_comp()(pfrag->sequenceID(), loc->first))
236 rejectedFragment = std::move(pfrag);
241 TRACE(12,
"EventStore: Performing insert");
249 TLOG_DEBUG(
"EventStore") <<
"EventStore::endOfData" << TLOG_ENDL;
250 RawEvent_ptr end_of_data(
nullptr);
251 TRACE(4,
"EventStore::endOfData: Enqueuing end_of_data event");
252 bool enqSuccess = queue_.enqTimedWait(end_of_data, enq_timeout_);
257 TRACE(4,
"EventStore::endOfData: Getting return code from art thread");
258 readerReturnValue = reader_thread_.get();
264 seqIDModulus_ = seqIDModulus;
270 size_t initialStoreSize = events_.size();
271 TLOG_DEBUG(
"EventStore") <<
"Flushing " << initialStoreSize
272 <<
" stale events from the EventStore." << TLOG_ENDL;
273 EventMap::iterator loc;
274 std::vector<sequence_id_t> flushList;
275 for (loc = events_.begin(); loc != events_.end(); ++loc)
277 RawEvent_ptr complete_event(loc->second);
278 MonitoredQuantityPtr mqPtr = StatisticsCollection::getInstance().
280 if (mqPtr.get() != 0)
282 mqPtr->addSample(complete_event->wordCount());
284 enqSuccess = queue_.enqTimedWait(complete_event, enq_timeout_);
291 flushList.push_back(loc->first);
294 for (
size_t idx = 0; idx < flushList.size(); ++idx)
296 events_.erase(flushList[idx]);
298 TLOG_DEBUG(
"EventStore") <<
"Done flushing " << flushList.size()
299 <<
" stale events from the EventStore." << TLOG_ENDL;
301 lastFlushedSeqID_ = highestSeqIDSeen_;
302 return (flushList.size() >= initialStoreSize);
307 if (!queue_.queueReaderIsReady())
309 TLOG_WARNING(
"EventStore") <<
"Run start requested, but the art thread is not yet ready, waiting up to " << art_thread_wait_ms_ <<
" msec..." << TLOG_ENDL;
310 while (!queue_.queueReaderIsReady() && std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - reader_thread_launch_time_).count() < art_thread_wait_ms_)
314 if (queue_.queueReaderIsReady())
316 auto dur = std::chrono::duration_cast<std::chrono::milliseconds>(queue_.getReadyTime() - reader_thread_launch_time_).count();
317 TLOG_INFO(
"EventStore") <<
"art initialization took (roughly) " << std::setw(4) << std::to_string(dur) <<
" ms." << TLOG_ENDL;
321 auto dur = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - reader_thread_launch_time_).count();
322 TLOG_ERROR(
"EventStore") <<
"art thread still not ready after " << dur <<
" ms. Continuing to start..." << TLOG_ENDL;
327 lastFlushedSeqID_ = 0;
328 highestSeqIDSeen_ = 0;
329 send_routing_token_(max_queue_size_);
330 TLOG_DEBUG(
"EventStore") <<
"Starting run " << run_id_
331 <<
", max queue size = "
333 <<
", queue capacity = "
336 << queue_.size() << TLOG_ENDL;
339 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
340 metricMan->sendMetric(
"Run Number", runSubrun,
"Run:Subrun", 1,
false);
349 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
350 metricMan->sendMetric(
"Run Number", runSubrun,
"Run:Subrun", 1,
false);
356 RawEvent_ptr endOfRunEvent(
new RawEvent(run_id_, subrun_id_, 0));
357 std::unique_ptr<artdaq::Fragment>
359 Fragment(static_cast<size_t>
360 (ceil(
sizeof(my_rank) /
361 static_cast<double>(
sizeof(Fragment::value_type))))));
363 endOfRunFrag->setSystemType(Fragment::EndOfRunFragmentType);
364 *endOfRunFrag->dataBegin() = my_rank;
365 endOfRunEvent->insertFragment(std::move(endOfRunFrag));
367 return queue_.enqTimedWait(endOfRunEvent, enq_timeout_);
372 RawEvent_ptr endOfSubrunEvent(
new RawEvent(run_id_, subrun_id_, 0));
373 std::unique_ptr<artdaq::Fragment>
375 Fragment(static_cast<size_t>
376 (ceil(
sizeof(my_rank) /
377 static_cast<double>(
sizeof(Fragment::value_type))))));
379 endOfSubrunFrag->setSystemType(Fragment::EndOfSubrunFragmentType);
380 *endOfSubrunFrag->dataBegin() = my_rank;
381 endOfSubrunEvent->insertFragment(std::move(endOfSubrunFrag));
383 return queue_.enqTimedWait(endOfSubrunEvent, enq_timeout_);
387 EventStore::initStatistics_()
389 MonitoredQuantityPtr mqPtr = StatisticsCollection::getInstance().
391 if (mqPtr.get() == 0)
393 mqPtr.reset(
new MonitoredQuantity(3.0, 300.0));
394 StatisticsCollection::getInstance().
399 mqPtr = StatisticsCollection::getInstance().
401 if (mqPtr.get() == 0)
403 mqPtr.reset(
new MonitoredQuantity(3.0, 300.0));
404 StatisticsCollection::getInstance().
411 EventStore::reportStatistics_()
413 MonitoredQuantityPtr mqPtr = StatisticsCollection::getInstance().
415 if (mqPtr.get() != 0)
419 <<
"_" << setfill(
'0') << setw(4) << my_rank <<
".txt";
420 std::string filename = oss.str();
421 ofstream outStream(filename.c_str());
422 mqPtr->waitUntilAccumulatorsHaveBeenFlushed(3.0);
423 artdaq::MonitoredQuantityStats stats;
424 mqPtr->getStats(stats);
425 outStream <<
"EventStore rank " << my_rank <<
": events processed = "
426 << stats.fullSampleCount <<
" at " << stats.fullSampleRate
427 <<
" events/sec, data rate = "
428 << (stats.fullValueRate *
sizeof(RawDataType)
429 / 1024.0 / 1024.0) <<
" MB/sec, duration = "
430 << stats.fullDuration <<
" sec" << std::endl
431 <<
" minimum event size = "
432 << (stats.fullValueMin *
sizeof(RawDataType)
434 <<
" MB, maximum event size = "
435 << (stats.fullValueMax *
sizeof(RawDataType)
437 <<
" MB" << std::endl;
438 bool foundTheStart =
false;
439 for (
int idx = 0; idx < (int)stats.recentBinnedDurations.size(); ++idx)
441 if (stats.recentBinnedDurations[idx] > 0.0)
443 foundTheStart =
true;
447 outStream <<
" " << std::fixed << std::setprecision(3)
448 << stats.recentBinnedEndTimes[idx]
449 <<
": " << stats.recentBinnedSampleCounts[idx]
451 << (stats.recentBinnedSampleCounts[idx] /
452 stats.recentBinnedDurations[idx])
453 <<
" events/sec, data rate = "
454 << (stats.recentBinnedValueSums[idx] *
455 sizeof(RawDataType) / 1024.0 / 1024.0 /
456 stats.recentBinnedDurations[idx])
457 <<
" MB/sec, bin size = "
458 << stats.recentBinnedDurations[idx]
459 <<
" sec" << std::endl;
465 mqPtr = StatisticsCollection::getInstance().
467 if (mqPtr.get() != 0)
471 << setw(4) << run_id_
472 <<
"_" << setfill(
'0') << setw(4) << my_rank <<
".txt";
473 std::string filename = oss.str();
474 ofstream outStream(filename.c_str());
475 mqPtr->waitUntilAccumulatorsHaveBeenFlushed(3.0);
476 artdaq::MonitoredQuantityStats stats;
477 mqPtr->getStats(stats);
478 outStream <<
"EventStore rank " << my_rank <<
": fragments processed = "
479 << stats.fullSampleCount <<
" at " << stats.fullSampleRate
480 <<
" fragments/sec, average incomplete event count = "
481 << stats.fullValueAverage <<
" duration = "
482 << stats.fullDuration <<
" sec" << std::endl
483 <<
" minimum incomplete event count = "
484 << stats.fullValueMin <<
", maximum incomplete event count = "
485 << stats.fullValueMax << std::endl;
486 bool foundTheStart =
false;
487 for (
int idx = 0; idx < (int)stats.recentBinnedDurations.size(); ++idx)
489 if (stats.recentBinnedDurations[idx] > 0.0)
491 foundTheStart =
true;
493 if (foundTheStart && stats.recentBinnedSampleCounts[idx] > 0.0)
495 outStream <<
" " << std::fixed << std::setprecision(3)
496 << stats.recentBinnedEndTimes[idx]
497 <<
": " << stats.recentBinnedSampleCounts[idx]
499 << (stats.recentBinnedSampleCounts[idx] /
500 stats.recentBinnedDurations[idx])
501 <<
" fragments/sec, average incomplete event count = "
502 << (stats.recentBinnedValueSums[idx] /
503 stats.recentBinnedSampleCounts[idx])
505 << stats.recentBinnedDurations[idx]
506 <<
" sec" << std::endl;
509 outStream <<
"Incomplete count now = " << events_.size() << std::endl;
515 EventStore::setup_requests_(std::string request_address)
519 request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
520 if (!request_socket_)
522 TLOG_ERROR(
"EventStore") <<
"I failed to create the socket for sending Data Requests!" << TLOG_ENDL;
525 int sts =
ResolveHost(request_address.c_str(), request_port_, request_addr_);
528 TLOG_ERROR(
"EventStore") <<
"Unable to resolve Data Request address" << TLOG_ENDL;
532 if (multicast_out_addr_ !=
"localhost") {
534 int sts =
ResolveHost(multicast_out_addr_.c_str(), addr);
537 TLOG_ERROR(
"EventStore") <<
"Unable to resolve multicast interface address" << TLOG_ENDL;
542 if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
544 TLOG_ERROR(
"EventStore") <<
"Unable to enable port reuse on request socket" << TLOG_ENDL;
547 if (setsockopt(request_socket_, IPPROTO_IP, IP_MULTICAST_IF, &addr,
sizeof(addr)) == -1)
549 TLOG_ERROR(
"EventStore") <<
"Cannot set outgoing interface." << TLOG_ENDL;
554 if (setsockopt(request_socket_, SOL_SOCKET, SO_BROADCAST, (
void*)&yes,
sizeof(
int)) == -1)
556 TLOG_ERROR(
"EventStore") <<
"Cannot set request socket to broadcast." << TLOG_ENDL;
563 EventStore::setup_tokens_()
565 if (send_routing_tokens_)
567 TLOG_DEBUG(
"EventStore") <<
"Creating Routing Token sending socket" << TLOG_ENDL;
568 token_socket_ =
TCPConnect(token_address_.c_str(), token_port_);
571 TLOG_ERROR(
"EventStore") <<
"I failed to create the socket for sending Routing Tokens!" << TLOG_ENDL;
577 void EventStore::do_send_request_()
579 std::this_thread::sleep_for(std::chrono::microseconds(request_delay_));
581 detail::RequestMessage message;
583 std::lock_guard<std::mutex> lk(request_mutex_);
584 for (
auto& req : active_requests_)
586 message.addRequest(req.first, req.second);
589 char str[INET_ADDRSTRLEN];
590 inet_ntop(AF_INET, &(request_addr_.sin_addr), str, INET_ADDRSTRLEN);
591 TLOG_DEBUG(
"EventStore") <<
"Sending request for " << std::to_string(message.size()) <<
" events to multicast group " << str << TLOG_ENDL;
592 if (sendto(request_socket_, message.header(),
sizeof(detail::RequestHeader), 0, (
struct sockaddr *)&request_addr_,
sizeof(request_addr_)) < 0)
594 TLOG_ERROR(
"EventStore") <<
"Error sending request message header" << TLOG_ENDL;
596 if (sendto(request_socket_, message.buffer(),
sizeof(detail::RequestPacket) * message.size(), 0, (
struct sockaddr *)&request_addr_,
sizeof(request_addr_)) < 0)
598 TLOG_ERROR(
"EventStore") <<
"Error sending request message data" << TLOG_ENDL;
602 void EventStore::send_routing_token_(
int nSlots)
604 TLOG_DEBUG(
"EventStore") <<
"send_routing_token_ called, send_routing_tokens_=" << std::boolalpha << send_routing_tokens_ << TLOG_ENDL;
605 if (!send_routing_tokens_)
return;
606 if (token_socket_ == -1) setup_tokens_();
607 detail::RoutingToken token;
608 token.header = TOKEN_MAGIC;
609 token.rank = my_rank;
610 token.new_slots_free = nSlots;
612 TLOG_DEBUG(
"EventStore") <<
"Sending RoutingToken to " << token_address_ <<
":" << token_port_ << TLOG_ENDL;
614 while (sts <
sizeof(detail::RoutingToken)) {
615 auto res = send(token_socket_, reinterpret_cast<uint8_t*>(&token) + sts,
sizeof(detail::RoutingToken) - sts, 0);
622 TLOG_DEBUG(
"EventStore") <<
"Done sending RoutingToken to " << token_address_ <<
":" << token_port_ << TLOG_ENDL;
626 EventStore::send_request_()
628 std::thread request([=] { do_send_request_(); });
637 metricMan->sendMetric(
"Incomplete Event Count", events_.size(),
640 if (incomplete_event_report_interval_ms_ > 0 && events_.size())
642 if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - last_incomplete_event_report_time_).count() < incomplete_event_report_interval_ms_)
return;
643 last_incomplete_event_report_time_ = std::chrono::steady_clock::now();
644 std::ostringstream oss;
645 oss <<
"Incomplete Events (" << num_fragments_per_event_ <<
"): ";
646 for (
auto& ev : events_)
648 oss << ev.first <<
" (" << ev.second->numFragments() <<
"), ";
650 TLOG_DEBUG(
"EventStore") << oss.str() << TLOG_ENDL;
void insert(FragmentPtr pfrag, bool printWarningWhenFragmentIsDropped=true)
Give ownership of the Fragment to the EventStore.
static const std::string EVENT_RATE_STAT_KEY
Key for the Event Rate MonitoredQuantity.
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
int TCPConnect(char const *host_in, int dflt_port, long flags=0, int sndbufsiz=0)
Connect to a host on a given port.
bool endOfData(int &readerReturnValue)
Indicate that the end of input has been reached to the art thread.
EventStoreInsertResult
This enumeration contains possible status codes of insertion attempts.
void startRun(run_id_t runID)
Start a Run.
EventStore()=delete
Default Constructor is deleted.
virtual ~EventStore()
EventStore Destructor.
The Fragment was successfully inserted.
void startSubrun()
Start a new Subrun, incrementing the subrun number.
The EventStore class collects Fragment objects, until it receives a complete event, at which point the event is handed over to the art thread.
static const std::string INCOMPLETE_EVENT_STAT_KEY
Key for the Incomplete Events MonitoredQuantity.
The EventStore is full, but the Fragment was accepted as it is for an already-open event...
void setSeqIDModulus(unsigned int seqIDModulus)
Set the parameter that will be used to determine which sequence IDs get grouped together into events...
void sendMetrics()
Send metrics to the MetricManager, if one has been instantiated in the application.
bool flushData()
Push any incomplete events onto the queue.
RawEvent::run_id_t run_id_t
Copy RawEvent::run_id_t into local scope.
The EventStore is full, and the Fragment was rejected.
The Fragment was rejected, because the RawEventQueue is full.
bool endRun()
Send an EndOfRunFragment to the art thread.
bool endSubrun()
Send an EndOfSubRunFragment to the art thread.