1 #include "artdaq/DAQdata/Globals.hh"
2 #define TRACE_NAME (app_name + "_DataSenderManager").c_str()
3 #include "artdaq/DAQdata/HostMap.hh"
4 #include "artdaq/DAQrate/DataSenderManager.hh"
5 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
8 #include <netinet/in.h>
10 #include <sys/socket.h>
11 #include <sys/types.h>
14 #include "canvas/Utilities/Exception.h"
18 , broadcast_sends_(pset.get<bool>(
"broadcast_sends", false))
19 , non_blocking_mode_(pset.get<bool>(
"nonblocking_sends", false))
20 , send_timeout_us_(pset.get<size_t>(
"send_timeout_usec", 5000000))
21 , send_retry_count_(pset.get<size_t>(
"send_retry_count", 2))
23 , highest_sequence_id_routed_(0)
25 TLOG(TLVL_DEBUG + 32) <<
"Received pset: " << pset.to_string();
28 if (send_timeout_us_ == 0)
30 send_timeout_us_ = std::numeric_limits<size_t>::max();
33 auto rmConfig = pset.get<fhicl::ParameterSet>(
"routing_table_config", fhicl::ParameterSet());
37 auto tcp_send_buffer_size = pset.get<
size_t>(
"tcp_send_buffer_size", 0);
38 auto max_fragment_size_words = pset.get<
size_t>(
"max_fragment_size_words", 0);
40 auto dests = pset.get<fhicl::ParameterSet>(
"destinations", fhicl::ParameterSet());
41 for (
auto& d : dests.get_pset_names())
43 auto dest_pset = dests.get<fhicl::ParameterSet>(d);
47 fhicl::ParameterSet dests_mod;
48 for (
auto& d : dests.get_pset_names())
50 auto dest_pset = dests.get<fhicl::ParameterSet>(d);
51 dest_pset.erase(
"host_map");
52 dest_pset.put<std::vector<fhicl::ParameterSet>>(
"host_map", host_map_pset);
54 if (tcp_send_buffer_size != 0 && !dest_pset.has_key(
"tcp_send_buffer_size"))
56 dest_pset.put<
size_t>(
"tcp_send_buffer_size", tcp_send_buffer_size);
58 if (max_fragment_size_words != 0 && !dest_pset.has_key(
"max_fragment_size_words"))
60 dest_pset.put<
size_t>(
"max_fragment_size_words", max_fragment_size_words);
63 dests_mod.put<fhicl::ParameterSet>(d, dest_pset);
66 for (
auto& d : dests_mod.get_pset_names())
71 auto destination_rank = transfer->destination_rank();
72 destinations_.emplace(destination_rank, std::move(transfer));
74 catch (
const std::invalid_argument&)
76 TLOG(TLVL_DEBUG + 32) <<
"Invalid destination specification: " << d;
78 catch (
const cet::exception& ex)
80 TLOG(TLVL_WARNING) <<
"Caught cet::exception: " << ex.what();
84 TLOG(TLVL_WARNING) <<
"Non-cet exception while setting up TransferPlugin: " << d <<
".";
87 if (destinations_.empty())
89 TLOG(TLVL_ERROR) <<
"No destinations specified!";
93 auto enabled_dests = pset.get<std::vector<size_t>>(
"enabled_destinations", std::vector<size_t>());
94 if (enabled_dests.empty())
96 TLOG(TLVL_INFO) <<
"enabled_destinations not specified, assuming all destinations enabled.";
97 for (
auto& d : destinations_)
99 enabled_destinations_.insert(d.first);
104 for (
auto& d : enabled_dests)
106 enabled_destinations_.insert(d);
114 TLOG(TLVL_DEBUG + 32) <<
"Shutting down DataSenderManager BEGIN";
116 for (
auto& dest : enabled_destinations_)
118 if (destinations_.count(dest) != 0u)
120 auto sts = destinations_[dest]->transfer_fragment_reliable_mode(std::move(*Fragment::eodFrag(sent_frag_count_.slotCount(dest))));
123 TLOG(TLVL_ERROR) <<
"Error sending EOD Fragment to sender rank " << dest;
128 TLOG(TLVL_DEBUG + 32) <<
"Shutting down DataSenderManager END. Sent " << count() <<
" fragments.";
133 return table_receiver_->GetRoutingTableEntryCount();
138 return table_receiver_->GetRemainingRoutingTableEntries();
141 int artdaq::DataSenderManager::calcDest_(Fragment::sequence_id_t sequence_id)
const
143 if (enabled_destinations_.empty())
148 if (table_receiver_->RoutingManagerEnabled())
150 TLOG(TLVL_DEBUG + 35) <<
"calcDest_ use_routing_manager check for routing info for seqID=" << sequence_id <<
" should_stop_=" << should_stop_;
151 return table_receiver_->GetRoutingTableEntry(sequence_id);
153 if (enabled_destinations_.size() == 1)
155 return *enabled_destinations_.begin();
157 auto index = sequence_id % enabled_destinations_.size();
158 auto it = enabled_destinations_.begin();
159 for (; index > 0; --index)
162 if (it == enabled_destinations_.end())
164 it = enabled_destinations_.begin();
172 TLOG(TLVL_DEBUG + 35) <<
"RemoveRoutingTableEntry: Removing sequence ID " << seq <<
" from routing table. Sent " << GetSentSequenceIDCount(seq) <<
" Fragments with this Sequence ID.";
173 table_receiver_->RemoveRoutingTableEntry(seq);
175 std::unique_lock<std::mutex> lck(sent_sequence_id_mutex_);
176 if (sent_sequence_id_count_.find(seq) != sent_sequence_id_count_.end())
178 sent_sequence_id_count_.erase(sent_sequence_id_count_.find(seq));
184 std::unique_lock<std::mutex> lck(sent_sequence_id_mutex_);
185 if (sent_sequence_id_count_.count(seq) == 0u)
189 return sent_sequence_id_count_[seq];
196 auto start_time = std::chrono::steady_clock::now();
197 if (frag.type() == Fragment::EndOfDataFragmentType)
199 throw cet::exception(
"LogicError")
200 <<
"EOD fragments should not be sent on as received: "
201 <<
"use sendEODFrag() instead.";
203 size_t seqID = frag.sequenceID();
204 size_t fragSize = frag.sizeBytes();
205 auto latency_s = frag.getLatency(
true);
206 auto isSystemBroadcast = frag.type() == Fragment::EndOfRunFragmentType || frag.type() == Fragment::EndOfSubrunFragmentType || frag.type() == Fragment::InitFragmentType;
208 double latency = latency_s.tv_sec + (latency_s.tv_nsec / 1000000000.0);
209 TLOG(TLVL_DEBUG + 36) <<
"sendFragment start frag.fragmentHeader()=" << std::hex << static_cast<void*>(frag.headerBeginBytes()) <<
", szB=" << std::dec << fragSize
210 <<
", seqID=" << seqID <<
", fragID=" << frag.fragmentID() <<
", type=" << frag.typeString();
213 if (broadcast_sends_ || isSystemBroadcast)
215 for (
auto& bdest : enabled_destinations_)
217 TLOG(TLVL_DEBUG + 33) <<
"sendFragment: Sending fragment with seqId " << seqID <<
" to destination " << bdest <<
" (broadcast)";
223 if (!non_blocking_mode_)
225 sts = destinations_[bdest]->transfer_fragment_reliable_mode(Fragment(frag));
229 sts = destinations_[bdest]->transfer_fragment_min_blocking_mode(frag, send_timeout_us_);
237 sent_frag_count_.incSlot(bdest);
240 else if (non_blocking_mode_)
242 dest = calcDest_(seqID);
245 TLOG(TLVL_WARNING) <<
"Could not get destination for seqID " << seqID;
250 TLOG(TLVL_DEBUG + 33) <<
"sendFragment: Sending fragment with seqId " << seqID <<
" to destination " << dest;
252 auto lastWarnTime = std::chrono::steady_clock::now();
256 sts = destinations_[dest]->transfer_fragment_min_blocking_mode(frag, send_timeout_us_);
259 TLOG(TLVL_WARNING) <<
"sendFragment: Sending fragment " << seqID <<
" to destination " << dest <<
" failed! Retrying...";
260 lastWarnTime = std::chrono::steady_clock::now();
269 sent_frag_count_.incSlot(dest);
271 else if (!should_stop_)
273 TLOG(TLVL_ERROR) <<
"(in non_blocking) calcDest returned invalid destination rank " << dest <<
"! This event has been lost: " << seqID
274 <<
". enabled_destinantions_.size()=" << enabled_destinations_.size();
279 auto start = std::chrono::steady_clock::now();
282 dest = calcDest_(seqID);
285 TLOG(TLVL_WARNING) <<
"Could not get destination for seqID " << seqID <<
", send number " << sent_frag_count_.count() <<
", retrying. Waited " << TimeUtils::GetElapsedTime(start) <<
" s for routing information.";
291 TLOG(TLVL_DEBUG + 34) <<
"DataSenderManager::sendFragment: Sending fragment with seqId " << seqID <<
" to destination " << dest;
294 sts = destinations_[dest]->transfer_fragment_reliable_mode(std::move(frag));
297 TLOG(TLVL_ERROR) <<
"sendFragment: Sending fragment " << seqID <<
" to destination "
298 << dest <<
" failed! Data has been lost!";
302 sent_frag_count_.incSlot(dest);
305 else if (!should_stop_)
307 TLOG(TLVL_ERROR) <<
"calcDest returned invalid destination rank " << dest <<
"! This event has been lost: " << seqID
308 <<
". enabled_destinantions_.size()=" << enabled_destinations_.size();
312 if (!isSystemBroadcast)
314 std::unique_lock<std::mutex> lck(sent_sequence_id_mutex_);
315 sent_sequence_id_count_[seqID]++;
318 auto delta_t = TimeUtils::GetElapsedTime(start_time);
322 TLOG(TLVL_DEBUG + 34) <<
"sendFragment: sending metrics";
323 metricMan->sendMetric(
"Data Send Time to Rank " + std::to_string(dest), delta_t,
"s", 5, MetricMode::Accumulate);
324 metricMan->sendMetric(
"Data Send Size to Rank " + std::to_string(dest), fragSize,
"B", 5, MetricMode::Accumulate | MetricMode::Maximum);
325 metricMan->sendMetric(
"Data Send Rate to Rank " + std::to_string(dest), fragSize / delta_t,
"B/s", 5, MetricMode::Average);
326 metricMan->sendMetric(
"Data Send Count to Rank " + std::to_string(dest), sent_frag_count_.slotCount(dest),
"fragments", 3, MetricMode::LastPoint);
328 metricMan->sendMetric(
"Rank", std::to_string(my_rank),
"", 3, MetricMode::LastPoint);
329 metricMan->sendMetric(
"App Name", app_name,
"", 3, MetricMode::LastPoint);
331 metricMan->sendMetric(
"Fragment Latency at Send", latency,
"s", 4, MetricMode::Average | MetricMode::Maximum);
334 TLOG(TLVL_DEBUG + 34) <<
"sendFragment: Done sending fragment " << seqID <<
" to dest=" << dest;
335 return std::make_pair(dest, outsts);
void RemoveRoutingTableEntry(Fragment::sequence_id_t seq)
Remove the given sequence ID from the routing table and sent_count lists.
The send operation timed out.
virtual ~DataSenderManager()
DataSenderManager Destructor.
static constexpr int ROUTING_FAILED
Value used to indicate that a route was not properly generated.
std::unique_ptr< artdaq::TransferInterface > MakeTransferPlugin(const fhicl::ParameterSet &pset, const std::string &plugin_label, TransferInterface::Role role)
Load a TransferInterface plugin.
size_t GetSentSequenceIDCount(Fragment::sequence_id_t seq)
Get the number of Fragments sent with a given Sequence ID.
std::pair< int, TransferInterface::CopyStatus > sendFragment(Fragment &&frag)
Send the given Fragment. Return the rank of the destination to which the Fragment was sent...
size_t GetRemainingRoutingTableEntries() const
Gets the number of sends remaining in the routing table, in case other parts of the system want to us...
DataSenderManager(const fhicl::ParameterSet &ps)
DataSenderManager Constructor.
std::vector< fhicl::ParameterSet > MakeHostMapPset(std::map< int, std::string > input)
Create a list of HostMap::HostConfig ParameterSets from a hostMap_t map
size_t GetRoutingTableEntryCount() const
Gets the current size of the Routing Table, in case other parts of the system want to use this inform...
This TransferInterface is a Sender.
Some error occurred, but no exception was thrown.
The send operation completed successfully.
Sends Fragment objects using TransferInterface plugins. Uses Routing Tables if confgiured, otherwise will Round-Robin Fragments to the destinations.
std::map< int, std::string > hostMap_t
The host_map is a map associating ranks with artdaq::DestinationInfo objects.
hostMap_t MakeHostMap(fhicl::ParameterSet const &pset, hostMap_t map=hostMap_t())
Make a hostMap_t from a HostMap::Config ParameterSet
CopyStatus
Returned from the send functions, this enumeration describes the possible return codes. If an exception occurs, it will be thrown and should be handled normally.