1 #include "artdaq/DAQdata/Globals.hh"
2 #include "artdaq/DAQrate/EventStore.hh"
12 #include "cetlib/exception.h"
13 #include "artdaq-core/Core/StatisticsCollection.hh"
14 #include "artdaq-core/Core/SimpleQueueReader.hh"
15 #include "artdaq/Application/Routing/RoutingPacket.hh"
22 const std::string EventStore::EVENT_RATE_STAT_KEY(
"EventStoreEventRate");
23 const std::string EventStore::INCOMPLETE_EVENT_STAT_KEY(
"EventStoreIncompleteEvents");
25 EventStore::EventStore(
const fhicl::ParameterSet& pset,
size_t num_fragments_per_event,
run_id_t run,
26 size_t event_queue_depth,
size_t max_incomplete_event_count)
27 : num_fragments_per_event_(num_fragments_per_event)
28 , max_queue_size_(pset.get<size_t>(
"event_queue_depth", event_queue_depth))
29 , max_incomplete_count_(pset.get<size_t>(
"max_incomplete_events", max_incomplete_event_count))
33 , queue_(getGlobalQueue(max_queue_size_))
34 , reader_thread_launch_time_(std::chrono::steady_clock::now())
35 , send_requests_(pset.get<bool>(
"send_requests", false))
37 , request_port_(pset.get<int>(
"request_port", 3001))
38 , request_delay_(pset.get<size_t>(
"request_delay_ms", 10))
39 , multicast_out_addr_(pset.get<std::string>(
"output_address",
"localhost"))
40 , request_mode_(detail::RequestMessageMode::Normal)
42 , lastFlushedSeqID_(0)
43 , highestSeqIDSeen_(0)
44 , enq_timeout_(pset.get<double>(
"event_queue_wait_time", 5.0))
45 , enq_check_count_(pset.get<size_t>(
"event_queue_check_count", 5000))
46 , printSummaryStats_(pset.get<bool>(
"print_event_store_stats", false))
47 , incomplete_event_report_interval_ms_(pset.get<int>(
"incomplete_event_report_interval_ms", -1))
48 , last_incomplete_event_report_time_(std::chrono::steady_clock::now())
50 , art_thread_wait_ms_(pset.get<int>(
"art_thread_wait_ms", 4000))
52 TLOG_DEBUG(
"EventStore") <<
"EventStore CONSTRUCTOR" << TLOG_ENDL;
54 setup_requests_(pset.get<std::string>(
"request_address",
"227.128.12.26"));
56 auto rmConfig = pset.get<fhicl::ParameterSet>(
"routing_token_config", fhicl::ParameterSet());
57 send_routing_tokens_ = rmConfig.get<
bool>(
"use_routing_master",
false);
58 token_port_ = rmConfig.get<
int>(
"routing_token_port", 35555);
59 token_address_ = rmConfig.get<std::string>(
"routing_master_hostname",
"localhost");
61 TRACE(12,
"artdaq::EventStore::EventStore ctor - reader_thread_ initialized");
65 size_t num_fragments_per_event,
69 ART_CMDLINE_FCN* reader)
70 :
EventStore(pset, num_fragments_per_event, run, 50, 50)
72 reader_thread_ = (std::async(std::launch::async, reader, argc, argv));
76 size_t num_fragments_per_event,
78 const std::string& configString,
79 ART_CFGSTRING_FCN* reader)
80 :
EventStore(pset, num_fragments_per_event, run, 20, 20)
82 reader_thread_ = (std::async(std::launch::async, reader, configString));
87 TLOG_DEBUG(
"EventStore") <<
"Shutting down EventStore" << TLOG_ENDL;
88 if (printSummaryStats_)
92 shutdown(request_socket_, 2);
93 close(request_socket_);
94 shutdown(token_socket_, 2);
99 bool printWarningWhenFragmentIsDropped)
103 assert(pfrag !=
nullptr);
104 assert(pfrag->fragmentID() != Fragment::InvalidFragmentID);
114 if (pfrag->sequenceID() > highestSeqIDSeen_)
116 highestSeqIDSeen_ = pfrag->sequenceID();
119 Fragment::timestamp_t timestamp = pfrag->timestamp();
124 std::lock_guard<std::mutex> lk(request_mutex_);
125 active_requests_[highestSeqIDSeen_] = timestamp;
132 if (send_requests_ && request_mode_ == detail::RequestMessageMode::EndOfRun)
134 std::lock_guard<std::mutex> lk(request_mutex_);
137 Fragment::sequence_id_t sequence_id = ((pfrag->sequenceID() - (1 + lastFlushedSeqID_)) / seqIDModulus_) + 1;
138 TRACE(13,
"EventStore::insert seq=%lu fragID=%d id=%d lastFlushed=%lu seqIDMod=%d seq=%lu"
139 , pfrag->sequenceID(), pfrag->fragmentID(), my_rank, lastFlushedSeqID_, seqIDModulus_, sequence_id);
144 EventMap::iterator loc = events_.lower_bound(sequence_id);
146 if (loc == events_.end() || events_.key_comp()(sequence_id, loc->first))
150 RawEvent_ptr newevent(
new RawEvent(run_id_, subrun_id_, pfrag->sequenceID()));
152 events_.insert(loc, EventMap::value_type(sequence_id, newevent));
156 loc->second->insertFragment(std::move(pfrag));
157 if (loc->second->numFragments() == num_fragments_per_event_)
162 RawEvent_ptr complete_event(loc->second);
163 complete_event->markComplete();
169 std::lock_guard<std::mutex> lk(request_mutex_);
170 active_requests_.erase(sequence_id);
175 MonitoredQuantityPtr mqPtr = StatisticsCollection::getInstance().
177 if (mqPtr.get() != 0)
179 mqPtr->addSample(complete_event->wordCount());
181 TRACE(14,
"EventStore::insert seq=%lu enqTimedWait start", sequence_id);
182 bool enqSuccess = queue_.enqTimedWait(complete_event, enq_timeout_);
183 TRACE(enqSuccess ? 14 : 0,
"EventStore::insert seq=%lu enqTimedWait complete", sequence_id);
187 if (printWarningWhenFragmentIsDropped)
189 TLOG_WARNING(
"EventStore") <<
"Enqueueing event " << sequence_id
190 <<
" FAILED, queue size = "
192 "; apparently no events were removed from this process's queue during the " << std::to_string(enq_timeout_.count())
193 <<
"-second timeout period" << TLOG_ENDL;
197 TLOG_DEBUG(
"EventStore") <<
"Enqueueing event " << sequence_id
198 <<
" FAILED, queue size = "
200 "; apparently no events were removed from this process's queue during the " << std::to_string(enq_timeout_.count())
201 <<
"-second timeout period" << TLOG_ENDL;
206 send_routing_token_(1);
209 MonitoredQuantityPtr mqPtr = StatisticsCollection::getInstance().
211 if (mqPtr.get() != 0)
213 mqPtr->addSample(events_.size());
223 TRACE(12,
"EventStore: Testing if queue is full");
226 size_t sleepTime = 1000000 * (enq_timeout_.count() / enq_check_count_);
227 TRACE(12,
"EventStore: sleepTime is %lu.", sleepTime);
228 size_t loopCount = 0;
229 while (loopCount < enq_check_count_ && queue_.full())
236 rejectedFragment = std::move(pfrag);
240 TRACE(12,
"EventStore: Testing if there's room in the EventStore");
241 auto incomplete_full = events_.size() >= max_incomplete_count_;
244 EventMap::iterator loc = events_.lower_bound(pfrag->sequenceID());
246 if (loc == events_.end() || events_.key_comp()(pfrag->sequenceID(), loc->first))
248 rejectedFragment = std::move(pfrag);
253 TRACE(12,
"EventStore: Performing insert");
261 TLOG_DEBUG(
"EventStore") <<
"EventStore::endOfData" << TLOG_ENDL;
262 RawEvent_ptr end_of_data(
nullptr);
263 TRACE(4,
"EventStore::endOfData: Enqueuing end_of_data event");
264 bool enqSuccess = queue_.enqTimedWait(end_of_data, enq_timeout_);
269 TRACE(4,
"EventStore::endOfData: Getting return code from art thread");
270 readerReturnValue = reader_thread_.get();
276 seqIDModulus_ = seqIDModulus;
282 size_t initialStoreSize = events_.size();
283 TLOG_DEBUG(
"EventStore") <<
"Flushing " << initialStoreSize
284 <<
" stale events from the EventStore." << TLOG_ENDL;
285 EventMap::iterator loc;
286 std::vector<sequence_id_t> flushList;
287 for (loc = events_.begin(); loc != events_.end(); ++loc)
289 RawEvent_ptr complete_event(loc->second);
290 MonitoredQuantityPtr mqPtr = StatisticsCollection::getInstance().
292 if (mqPtr.get() != 0)
294 mqPtr->addSample(complete_event->wordCount());
296 enqSuccess = queue_.enqTimedWait(complete_event, enq_timeout_);
303 flushList.push_back(loc->first);
306 for (
size_t idx = 0; idx < flushList.size(); ++idx)
308 events_.erase(flushList[idx]);
310 TLOG_DEBUG(
"EventStore") <<
"Done flushing " << flushList.size()
311 <<
" stale events from the EventStore." << TLOG_ENDL;
313 lastFlushedSeqID_ = highestSeqIDSeen_;
314 return (flushList.size() >= initialStoreSize);
319 if (!queue_.queueReaderIsReady())
321 TLOG_WARNING(
"EventStore") <<
"Run start requested, but the art thread is not yet ready, waiting up to " << art_thread_wait_ms_ <<
" msec..." << TLOG_ENDL;
322 while (!queue_.queueReaderIsReady() && std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - reader_thread_launch_time_).count() < art_thread_wait_ms_)
326 if (queue_.queueReaderIsReady())
328 auto dur = std::chrono::duration_cast<std::chrono::milliseconds>(queue_.getReadyTime() - reader_thread_launch_time_).count();
329 TLOG_INFO(
"EventStore") <<
"art initialization took (roughly) " << std::setw(4) << std::to_string(dur) <<
" ms." << TLOG_ENDL;
333 auto dur = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - reader_thread_launch_time_).count();
334 TLOG_ERROR(
"EventStore") <<
"art thread still not ready after " << dur <<
" ms. Continuing to start..." << TLOG_ENDL;
339 lastFlushedSeqID_ = 0;
340 highestSeqIDSeen_ = 0;
341 send_routing_token_(max_queue_size_);
342 TLOG_DEBUG(
"EventStore") <<
"Starting run " << run_id_
343 <<
", max queue size = "
345 <<
", queue capacity = "
348 << queue_.size() << TLOG_ENDL;
351 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
352 metricMan->sendMetric(
"Run Number", runSubrun,
"Run:Subrun", 1,
false);
361 double runSubrun = run_id_ + ((double)subrun_id_ / 10000);
362 metricMan->sendMetric(
"Run Number", runSubrun,
"Run:Subrun", 1,
false);
368 RawEvent_ptr endOfRunEvent(
new RawEvent(run_id_, subrun_id_, 0));
369 std::unique_ptr<artdaq::Fragment>
371 Fragment(static_cast<size_t>
372 (ceil(
sizeof(my_rank) /
373 static_cast<double>(
sizeof(Fragment::value_type))))));
375 endOfRunFrag->setSystemType(Fragment::EndOfRunFragmentType);
376 *endOfRunFrag->dataBegin() = my_rank;
377 endOfRunEvent->insertFragment(std::move(endOfRunFrag));
379 return queue_.enqTimedWait(endOfRunEvent, enq_timeout_);
384 RawEvent_ptr endOfSubrunEvent(
new RawEvent(run_id_, subrun_id_, 0));
385 std::unique_ptr<artdaq::Fragment>
387 Fragment(static_cast<size_t>
388 (ceil(
sizeof(my_rank) /
389 static_cast<double>(
sizeof(Fragment::value_type))))));
391 endOfSubrunFrag->setSystemType(Fragment::EndOfSubrunFragmentType);
392 *endOfSubrunFrag->dataBegin() = my_rank;
393 endOfSubrunEvent->insertFragment(std::move(endOfSubrunFrag));
395 return queue_.enqTimedWait(endOfSubrunEvent, enq_timeout_);
399 EventStore::initStatistics_()
401 MonitoredQuantityPtr mqPtr = StatisticsCollection::getInstance().
403 if (mqPtr.get() == 0)
405 mqPtr.reset(
new MonitoredQuantity(3.0, 300.0));
406 StatisticsCollection::getInstance().
411 mqPtr = StatisticsCollection::getInstance().
413 if (mqPtr.get() == 0)
415 mqPtr.reset(
new MonitoredQuantity(3.0, 300.0));
416 StatisticsCollection::getInstance().
423 EventStore::reportStatistics_()
425 MonitoredQuantityPtr mqPtr = StatisticsCollection::getInstance().
427 if (mqPtr.get() != 0)
431 <<
"_" << setfill(
'0') << setw(4) << my_rank <<
".txt";
432 std::string filename = oss.str();
433 ofstream outStream(filename.c_str());
434 mqPtr->waitUntilAccumulatorsHaveBeenFlushed(3.0);
435 artdaq::MonitoredQuantityStats stats;
436 mqPtr->getStats(stats);
437 outStream <<
"EventStore rank " << my_rank <<
": events processed = "
438 << stats.fullSampleCount <<
" at " << stats.fullSampleRate
439 <<
" events/sec, data rate = "
440 << (stats.fullValueRate *
sizeof(RawDataType)
441 / 1024.0 / 1024.0) <<
" MB/sec, duration = "
442 << stats.fullDuration <<
" sec" << std::endl
443 <<
" minimum event size = "
444 << (stats.fullValueMin *
sizeof(RawDataType)
446 <<
" MB, maximum event size = "
447 << (stats.fullValueMax *
sizeof(RawDataType)
449 <<
" MB" << std::endl;
450 bool foundTheStart =
false;
451 for (
int idx = 0; idx < (int)stats.recentBinnedDurations.size(); ++idx)
453 if (stats.recentBinnedDurations[idx] > 0.0)
455 foundTheStart =
true;
459 outStream <<
" " << std::fixed << std::setprecision(3)
460 << stats.recentBinnedEndTimes[idx]
461 <<
": " << stats.recentBinnedSampleCounts[idx]
463 << (stats.recentBinnedSampleCounts[idx] /
464 stats.recentBinnedDurations[idx])
465 <<
" events/sec, data rate = "
466 << (stats.recentBinnedValueSums[idx] *
467 sizeof(RawDataType) / 1024.0 / 1024.0 /
468 stats.recentBinnedDurations[idx])
469 <<
" MB/sec, bin size = "
470 << stats.recentBinnedDurations[idx]
471 <<
" sec" << std::endl;
477 mqPtr = StatisticsCollection::getInstance().
479 if (mqPtr.get() != 0)
483 << setw(4) << run_id_
484 <<
"_" << setfill(
'0') << setw(4) << my_rank <<
".txt";
485 std::string filename = oss.str();
486 ofstream outStream(filename.c_str());
487 mqPtr->waitUntilAccumulatorsHaveBeenFlushed(3.0);
488 artdaq::MonitoredQuantityStats stats;
489 mqPtr->getStats(stats);
490 outStream <<
"EventStore rank " << my_rank <<
": fragments processed = "
491 << stats.fullSampleCount <<
" at " << stats.fullSampleRate
492 <<
" fragments/sec, average incomplete event count = "
493 << stats.fullValueAverage <<
" duration = "
494 << stats.fullDuration <<
" sec" << std::endl
495 <<
" minimum incomplete event count = "
496 << stats.fullValueMin <<
", maximum incomplete event count = "
497 << stats.fullValueMax << std::endl;
498 bool foundTheStart =
false;
499 for (
int idx = 0; idx < (int)stats.recentBinnedDurations.size(); ++idx)
501 if (stats.recentBinnedDurations[idx] > 0.0)
503 foundTheStart =
true;
505 if (foundTheStart && stats.recentBinnedSampleCounts[idx] > 0.0)
507 outStream <<
" " << std::fixed << std::setprecision(3)
508 << stats.recentBinnedEndTimes[idx]
509 <<
": " << stats.recentBinnedSampleCounts[idx]
511 << (stats.recentBinnedSampleCounts[idx] /
512 stats.recentBinnedDurations[idx])
513 <<
" fragments/sec, average incomplete event count = "
514 << (stats.recentBinnedValueSums[idx] /
515 stats.recentBinnedSampleCounts[idx])
517 << stats.recentBinnedDurations[idx]
518 <<
" sec" << std::endl;
521 outStream <<
"Incomplete count now = " << events_.size() << std::endl;
527 EventStore::setup_requests_(std::string request_address)
531 request_socket_ = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
532 if (!request_socket_)
534 TLOG_ERROR(
"EventStore") <<
"I failed to create the socket for sending Data Requests!" << TLOG_ENDL;
537 int sts =
ResolveHost(request_address.c_str(), request_port_, request_addr_);
540 TLOG_ERROR(
"EventStore") <<
"Unable to resolve Data Request address" << TLOG_ENDL;
544 if (multicast_out_addr_ !=
"localhost") {
546 int sts =
ResolveHost(multicast_out_addr_.c_str(), addr);
549 TLOG_ERROR(
"EventStore") <<
"Unable to resolve multicast interface address" << TLOG_ENDL;
554 if (setsockopt(request_socket_, SOL_SOCKET, SO_REUSEADDR, &yes,
sizeof(yes)) < 0)
556 TLOG_ERROR(
"EventStore") <<
"Unable to enable port reuse on request socket" << TLOG_ENDL;
559 if (setsockopt(request_socket_, IPPROTO_IP, IP_MULTICAST_IF, &addr,
sizeof(addr)) == -1)
561 TLOG_ERROR(
"EventStore") <<
"Cannot set outgoing interface." << TLOG_ENDL;
566 if (setsockopt(request_socket_, SOL_SOCKET, SO_BROADCAST, (
void*)&yes,
sizeof(
int)) == -1)
568 TLOG_ERROR(
"EventStore") <<
"Cannot set request socket to broadcast." << TLOG_ENDL;
575 EventStore::setup_tokens_()
577 if (send_routing_tokens_)
579 TLOG_DEBUG(
"EventStore") <<
"Creating Routing Token sending socket" << TLOG_ENDL;
580 token_socket_ =
TCPConnect(token_address_.c_str(), token_port_);
583 TLOG_ERROR(
"EventStore") <<
"I failed to create the socket for sending Routing Tokens!" << TLOG_ENDL;
589 void EventStore::do_send_request_()
591 std::this_thread::sleep_for(std::chrono::microseconds(request_delay_));
593 detail::RequestMessage message;
595 std::lock_guard<std::mutex> lk(request_mutex_);
596 for (
auto& req : active_requests_)
598 message.addRequest(req.first, req.second);
601 message.header()->mode = request_mode_;
602 char str[INET_ADDRSTRLEN];
603 inet_ntop(AF_INET, &(request_addr_.sin_addr), str, INET_ADDRSTRLEN);
604 TLOG_DEBUG(
"EventStore") <<
"Sending request for " << std::to_string(message.size()) <<
" events to multicast group " << str << TLOG_ENDL;
605 if (sendto(request_socket_, message.header(),
sizeof(detail::RequestHeader), 0, (
struct sockaddr *)&request_addr_,
sizeof(request_addr_)) < 0)
607 TLOG_ERROR(
"EventStore") <<
"Error sending request message header" << TLOG_ENDL;
609 if (sendto(request_socket_, message.buffer(),
sizeof(detail::RequestPacket) * message.size(), 0, (
struct sockaddr *)&request_addr_,
sizeof(request_addr_)) < 0)
611 TLOG_ERROR(
"EventStore") <<
"Error sending request message data" << TLOG_ENDL;
615 void EventStore::send_routing_token_(
int nSlots)
617 TLOG_DEBUG(
"EventStore") <<
"send_routing_token_ called, send_routing_tokens_=" << std::boolalpha << send_routing_tokens_ << TLOG_ENDL;
618 if (!send_routing_tokens_)
return;
619 if (token_socket_ == -1) setup_tokens_();
620 detail::RoutingToken token;
621 token.header = TOKEN_MAGIC;
622 token.rank = my_rank;
623 token.new_slots_free = nSlots;
625 TLOG_DEBUG(
"EventStore") <<
"Sending RoutingToken to " << token_address_ <<
":" << token_port_ << TLOG_ENDL;
627 while (sts <
sizeof(detail::RoutingToken)) {
628 auto res = send(token_socket_, reinterpret_cast<uint8_t*>(&token) + sts,
sizeof(detail::RoutingToken) - sts, 0);
635 TLOG_DEBUG(
"EventStore") <<
"Done sending RoutingToken to " << token_address_ <<
":" << token_port_ << TLOG_ENDL;
639 EventStore::send_request_()
641 std::thread request([=] { do_send_request_(); });
650 metricMan->sendMetric(
"Incomplete Event Count", events_.size(),
653 if (incomplete_event_report_interval_ms_ > 0 && events_.size())
655 if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - last_incomplete_event_report_time_).count() < incomplete_event_report_interval_ms_)
return;
656 last_incomplete_event_report_time_ = std::chrono::steady_clock::now();
657 std::ostringstream oss;
658 oss <<
"Incomplete Events (" << num_fragments_per_event_ <<
"): ";
659 for (
auto& ev : events_)
661 oss << ev.first <<
" (" << ev.second->numFragments() <<
"), ";
663 TLOG_DEBUG(
"EventStore") << oss.str() << TLOG_ENDL;
void insert(FragmentPtr pfrag, bool printWarningWhenFragmentIsDropped=true)
Give ownership of the Fragment to the EventStore.
static const std::string EVENT_RATE_STAT_KEY
Key for the Event Rate MonitoredQuantity.
int ResolveHost(char const *host_in, in_addr &addr)
Convert a string hostname to a in_addr suitable for socket communication.
int TCPConnect(char const *host_in, int dflt_port, long flags=0, int sndbufsiz=0)
Connect to a host on a given port.
bool endOfData(int &readerReturnValue)
Indicate that the end of input has been reached to the art thread.
EventStoreInsertResult
This enumeration contains possible status codes of insertion attempts.
void startRun(run_id_t runID)
Start a Run.
EventStore()=delete
Default Constructor is deleted.
virtual ~EventStore()
EventStore Destructor.
The Fragment was successfully inserted.
void startSubrun()
Start a new Subrun, incrementing the subrun number.
The EventStore class collects Fragment objects, until it receives a complete event, at which point the event is handed over to the art thread.
static const std::string INCOMPLETE_EVENT_STAT_KEY
Key for the Incomplete Events MonitoredQuantity.
The EventStore is full, but the Fragment was accepted as it is for an already-open event...
void setSeqIDModulus(unsigned int seqIDModulus)
Set the parameter that will be used to determine which sequence IDs get grouped together into events...
void sendMetrics()
Send metrics to the MetricManager, if one has been instantiated in the application.
bool flushData()
Push any incomplete events onto the queue.
RawEvent::run_id_t run_id_t
Copy RawEvent::run_id_t into local scope.
The EventStore is full, and the Fragment was rejected.
The Fragment was rejected, because the RawEventQueue is full.
bool endRun()
Send an EndOfRunFragment to the art thread.
bool endSubrun()
Send an EndOfSubRunFragment to the art thread.