1 #include "proto/TransferTest.hh"
3 #include "artdaq-core/Data/Fragment.hh"
4 #include "proto/FragmentReceiverManager.hh"
5 #include "artdaq/DAQrate/DataSenderManager.hh"
7 #include "artdaq/DAQdata/Globals.hh"
9 #include "fhiclcpp/make_ParameterSet.h"
12 : senders_(psi.get<int>(
"num_senders"))
13 , receivers_(psi.get<int>(
"num_receivers"))
14 , sends_each_sender_(psi.get<int>(
"sends_per_sender"))
15 , receives_each_receiver_(senders_ * sends_each_sender_ / receivers_)
16 , buffer_count_(psi.get<int>(
"buffer_count", 10))
17 , max_payload_size_(psi.get<size_t>(
"fragment_size", 0x100000))
19 , validate_mode_(psi.get<bool>(
"validate_data_mode", false))
21 TLOG_ARB(10,
"TransferTest") <<
"CONSTRUCTOR" << TLOG_ENDL;
22 metricMan = &metricMan_;
24 fhicl::ParameterSet metric_pset;
28 metric_pset = psi.get<fhicl::ParameterSet>(
"metrics");
34 std::string name =
"TransferTest" + std::to_string(my_rank);
35 metricMan_.initialize(metric_pset, name);
36 metricMan_.do_start();
40 std::string type(psi.get<std::string>(
"transfer_plugin_type",
"Shmem"));
44 if (senders_ * sends_each_sender_ % receivers_ != 0)
46 std::cout <<
"Adding sends so that sends_each_sender * num_sending_ranks is a multiple of num_receiving_ranks" << std::endl;
47 while (senders_ * sends_each_sender_ % receivers_ != 0)
51 receives_each_receiver_ = senders_ * sends_each_sender_ / receivers_;
52 std::cout <<
"sends_each_sender is now " << sends_each_sender_ << std::endl;
53 psi.put_or_replace(
"sends_per_sender", sends_each_sender_);
57 std::string hostmap =
"";
58 if (psi.has_key(
"hostmap"))
60 auto masterPortOffset = (key % (36728 - 10240)) + 1024;
61 hostmap =
" host_map: @local::hostmap master_port_offset: " + std::to_string(masterPortOffset);
65 ss << psi.to_string();
67 for (
int ii = 0; ii < senders_; ++ii)
69 ss <<
"s" << ii <<
": { transferPluginType: " << type <<
" source_rank: " << ii <<
" max_fragment_size_words: " << max_payload_size_ <<
" buffer_count: " << buffer_count_ <<
" shm_key_offset: " << std::to_string(key) << hostmap <<
"}";
71 ss <<
"} destinations: {";
72 for (
int jj = senders_; jj < senders_ + receivers_; ++jj)
74 ss <<
"d" << jj <<
": { transferPluginType: " << type <<
" destination_rank: " << jj <<
" max_fragment_size_words: " << max_payload_size_ <<
" buffer_count: " << buffer_count_ <<
" shm_key_offset: " << std::to_string(key) << hostmap <<
"}";
78 make_ParameterSet(ss.str(), ps_);
81 std::cout <<
"Going to configure with ParameterSet: " << ps_.to_string() << std::endl;
86 TLOG_ARB(11,
"TransferTest") <<
"runTest BEGIN" << TLOG_ENDL;
87 start_time_ = std::chrono::steady_clock::now();
88 std::pair<size_t, double> result;
89 if (my_rank >= senders_ + receivers_)
return 0;
90 if (my_rank < senders_)
92 result = do_sending();
96 result = do_receiving();
98 auto duration = std::chrono::duration_cast<artdaq::TimeUtils::seconds>(std::chrono::steady_clock::now() - start_time_).count();
99 std::cout << (my_rank < senders_ ?
"Sent " :
"Received ") << result.first <<
" bytes in " << duration <<
" seconds ( " << formatBytes(result.first / duration) <<
"/s )." << std::endl;
100 std::cout <<
"Rate of " << (my_rank < senders_ ?
"sending" :
"receiving") <<
": " << formatBytes(result.first / result.second) <<
"/s." << std::endl;
101 metricMan_.do_stop();
102 metricMan_.shutdown();
103 TLOG_ARB(11,
"TransferTest") <<
"runTest DONE" << TLOG_ENDL;
107 std::pair<size_t, double> artdaq::TransferTest::do_sending()
109 TLOG_ARB(7,
"TransferTest") <<
"do_sending entered RawFragmentHeader::num_words()=" << std::to_string(artdaq::detail::RawFragmentHeader::num_words()) << TLOG_ENDL;
111 size_t totalSize = 0;
112 double totalTime = 0;
115 unsigned data_size_wrds = max_payload_size_ /
sizeof(artdaq::RawDataType) - artdaq::detail::RawFragmentHeader::num_words();
116 if (data_size_wrds < 8) data_size_wrds = 8;
117 artdaq::Fragment frag(data_size_wrds);
121 artdaq::RawDataType gen_seed = 0;
123 std::generate_n(frag.dataBegin(), data_size_wrds, [&]() {
return ++gen_seed; });
124 for (
size_t ii = 0; ii < frag.dataSize(); ++ii)
126 if (*(frag.dataBegin() + ii) != ii + 1)
128 TLOG_ERROR(
"TransferTest") <<
"Data corruption detected! (" << std::to_string(*(frag.dataBegin() + ii)) <<
" != " << std::to_string(ii + 1) <<
") Aborting!" << TLOG_ENDL;
134 int metric_send_interval = sends_each_sender_ / 1000 > 1 ? sends_each_sender_ / 1000 : 1;
135 auto init_time_metric = 0.0;
136 auto send_time_metric = 0.0;
137 auto after_time_metric = 0.0;
138 auto send_size_metric = 0.0;
140 for (
int ii = 0; ii < sends_each_sender_; ++ii)
142 auto loop_start = std::chrono::steady_clock::now();
143 TLOG_ARB(7,
"TransferTest") <<
"sender rank " << my_rank <<
" #" << ii <<
" resized bytes=" << std::to_string(frag.sizeBytes()) << TLOG_ENDL;
144 totalSize += frag.sizeBytes();
147 frag.setSequenceID(ii);
148 frag.setFragmentID(my_rank);
149 frag.setSystemType(artdaq::Fragment::DataFragmentType);
156 auto send_start = std::chrono::steady_clock::now();
157 sender.sendFragment(std::move(frag));
158 auto after_send = std::chrono::steady_clock::now();
159 TLOG_TRACE(
"TransferTest") <<
"Sender " << my_rank <<
" sent fragment " << ii << TLOG_ENDL;
162 frag = artdaq::Fragment(data_size_wrds);
165 artdaq::RawDataType gen_seed = ii + 1;
167 std::generate_n(frag.dataBegin(), data_size_wrds, [&]() {
return ++gen_seed; });
168 for (
size_t jj = 0; jj < frag.dataSize(); ++jj)
170 if (*(frag.dataBegin() + jj) != (ii + 1) + jj + 1)
172 TLOG_ERROR(
"TransferTest") <<
"Input Data corruption detected! (" << std::to_string(*(frag.dataBegin() + jj)) <<
" != " << std::to_string(ii + jj + 2) <<
" at position " << ii <<
") Aborting!" << TLOG_ENDL;
177 TLOG_ARB(9,
"TransferTest") <<
"sender rank " << my_rank <<
" frag replaced" << TLOG_ENDL;
179 auto total_send_time = std::chrono::duration_cast<artdaq::TimeUtils::seconds>(after_send - send_start).count();
180 totalTime += total_send_time;
181 send_time_metric += total_send_time;
182 send_size_metric += data_size_wrds *
sizeof(artdaq::RawDataType);
183 after_time_metric += std::chrono::duration_cast<artdaq::TimeUtils::seconds>(std::chrono::steady_clock::now() - after_send).count();
184 init_time_metric += std::chrono::duration_cast<artdaq::TimeUtils::seconds>(send_start - loop_start).count();
186 if (metricMan && ii % metric_send_interval == 0)
188 metricMan->sendMetric(
"send_init_time", init_time_metric,
"seconds", 3, MetricMode::Accumulate);
189 metricMan->sendMetric(
"total_send_time", send_time_metric,
"seconds", 3, MetricMode::Accumulate);
190 metricMan->sendMetric(
"after_send_time", after_time_metric,
"seconds", 3, MetricMode::Accumulate);
191 metricMan->sendMetric(
"send_rate", send_size_metric / send_time_metric,
"B/s", 3, MetricMode::Average);
192 init_time_metric = 0.0;
193 send_time_metric = 0.0;
194 after_time_metric = 0.0;
195 send_size_metric = 0.0;
199 return std::make_pair(totalSize, totalTime);
202 std::pair<size_t, double> artdaq::TransferTest::do_receiving()
204 TLOG_ARB(7,
"TransferTest") <<
"do_receiving entered" << TLOG_ENDL;
207 receiver.start_threads();
208 int counter = receives_each_receiver_;
209 size_t totalSize = 0;
210 double totalTime = 0;
212 int activeSenders = senders_;
213 auto end_loop = std::chrono::steady_clock::now();
215 auto recv_size_metric = 0.0;
216 auto recv_time_metric = 0.0;
217 auto input_wait_metric = 0.0;
218 auto init_wait_metric = 0.0;
219 int metric_send_interval = receives_each_receiver_ / 1000 > 1 ? receives_each_receiver_ : 1;
221 while (activeSenders > 0)
223 auto start_loop = std::chrono::steady_clock::now();
224 TLOG_ARB(7,
"TransferTest") <<
"do_receiving: Counter is " << counter <<
", calling recvFragment" << TLOG_ENDL;
226 auto before_receive = std::chrono::steady_clock::now();
227 init_wait_metric += std::chrono::duration_cast<artdaq::TimeUtils::seconds>(before_receive - start_loop).count();
229 auto ignoreFragPtr = receiver.recvFragment(senderSlot);
230 auto after_receive = std::chrono::steady_clock::now();
234 if (ignoreFragPtr->type() == artdaq::Fragment::EndOfDataFragmentType)
236 std::cout <<
"Receiver " << my_rank <<
" received EndOfData Fragment from Sender " << senderSlot << std::endl;
243 start_time_ = std::chrono::steady_clock::now();
247 TLOG_INFO(
"TransferTest") <<
"Receiver " << my_rank <<
" received fragment " << receives_each_receiver_ - counter
248 <<
" with seqID " << std::to_string(ignoreFragPtr->sequenceID()) <<
" from Sender " << senderSlot <<
" (Expecting " << counter <<
" more)" << TLOG_ENDL;
249 thisSize = ignoreFragPtr->size() *
sizeof(artdaq::RawDataType);
250 totalSize += thisSize;
253 for (
size_t ii = 0; ii < ignoreFragPtr->dataSize(); ++ii)
255 if (*(ignoreFragPtr->dataBegin() + ii) != ignoreFragPtr->sequenceID() + ii + 1)
257 TLOG_ERROR(
"TransferTest") <<
"Output Data corruption detected! (" << std::to_string(*(ignoreFragPtr->dataBegin() + ii)) <<
" != " << std::to_string(ignoreFragPtr->sequenceID() + ii + 1) <<
" at position " << ii <<
") Aborting!" << TLOG_ENDL;
263 input_wait_metric += std::chrono::duration_cast<artdaq::TimeUtils::seconds>(after_receive - end_loop).count();
265 TLOG_ARB(7,
"TransferTest") <<
"do_receiving: Recv Loop end, counter is " << counter << TLOG_ENDL;
266 auto total_recv_time = std::chrono::duration_cast<artdaq::TimeUtils::seconds>(after_receive - before_receive).count();
267 recv_time_metric += total_recv_time;
268 totalTime += total_recv_time;
269 recv_size_metric += thisSize;
271 if (metricMan && counter % metric_send_interval == 0)
273 metricMan->sendMetric(
"input_wait", input_wait_metric,
"seconds", 3, MetricMode::Accumulate);
274 metricMan->sendMetric(
"recv_init_time", init_wait_metric,
"seconds", 3, MetricMode::Accumulate);
275 metricMan->sendMetric(
"total_recv_time", recv_time_metric,
"seconds", 3, MetricMode::Accumulate);
276 metricMan->sendMetric(
"recv_rate", recv_size_metric / recv_time_metric,
"B/s", 3, MetricMode::Average);
278 input_wait_metric = 0.0;
279 init_wait_metric = 0.0;
280 recv_time_metric = 0.0;
281 recv_size_metric = 0.0;
283 end_loop = std::chrono::steady_clock::now();
286 return std::make_pair(totalSize, totalTime);
int runTest()
Run the test as configured.
Sends Fragment objects using TransferInterface plugins. Uses Routing Tables if confgiured, otherwise will Round-Robin Fragments to the destinations.
Receives Fragment objects from one or more DataSenderManager instances using TransferInterface plugin...
static const int RECV_TIMEOUT
Value to be returned upon receive timeout. Because receivers otherwise return rank, this is also the limit on the number of ranks that artdaq currently supports.
TransferTest(fhicl::ParameterSet psi, uint32_t key)
TransferTest Constructor.