artdaq  v3_00_01
TransferTest.cc
1 #include "proto/TransferTest.hh"
2 
3 #include "artdaq-core/Data/Fragment.hh"
4 #include "proto/FragmentReceiverManager.hh"
5 #include "artdaq/DAQrate/DataSenderManager.hh"
6 
7 #include "artdaq/DAQdata/Globals.hh"
8 
9 #include "fhiclcpp/make_ParameterSet.h"
10 
11 artdaq::TransferTest::TransferTest(fhicl::ParameterSet psi, uint32_t key)
12  : senders_(psi.get<int>("num_senders"))
13  , receivers_(psi.get<int>("num_receivers"))
14  , sends_each_sender_(psi.get<int>("sends_per_sender"))
15  , receives_each_receiver_(senders_ * sends_each_sender_ / receivers_)
16  , buffer_count_(psi.get<int>("buffer_count", 10))
17  , max_payload_size_(psi.get<size_t>("fragment_size", 0x100000))
18  , ps_()
19  , validate_mode_(psi.get<bool>("validate_data_mode", false))
20 {
21  TLOG_ARB(10, "TransferTest") << "CONSTRUCTOR" << TLOG_ENDL;
22  metricMan = &metricMan_;
23 
24  fhicl::ParameterSet metric_pset;
25 
26  try
27  {
28  metric_pset = psi.get<fhicl::ParameterSet>("metrics");
29  }
30  catch (...) {} // OK if there's no metrics table defined in the FHiCL
31 
32  try
33  {
34  std::string name = "TransferTest" + std::to_string(my_rank);
35  metricMan_.initialize(metric_pset, name);
36  metricMan_.do_start();
37  }
38  catch (...) {}
39 
40  std::string type(psi.get<std::string>("transfer_plugin_type", "Shmem"));
41 
42  if (receivers_ > 0)
43  {
44  if (senders_ * sends_each_sender_ % receivers_ != 0)
45  {
46  std::cout << "Adding sends so that sends_each_sender * num_sending_ranks is a multiple of num_receiving_ranks" << std::endl;
47  while (senders_ * sends_each_sender_ % receivers_ != 0)
48  {
49  sends_each_sender_++;
50  }
51  receives_each_receiver_ = senders_ * sends_each_sender_ / receivers_;
52  std::cout << "sends_each_sender is now " << sends_each_sender_ << std::endl;
53  psi.put_or_replace("sends_per_sender", sends_each_sender_);
54  }
55  }
56 
57  std::string hostmap = "";
58  if (psi.has_key("hostmap"))
59  {
60  auto masterPortOffset = (key % (36728 - 10240)) + 1024; // Leave lots of headroom
61  hostmap = " host_map: @local::hostmap master_port_offset: " + std::to_string(masterPortOffset);
62  }
63 
64  std::stringstream ss;
65  ss << psi.to_string();
66  ss << " sources: {";
67  for (int ii = 0; ii < senders_; ++ii)
68  {
69  ss << "s" << ii << ": { transferPluginType: " << type << " source_rank: " << ii << " max_fragment_size_words: " << max_payload_size_ << " buffer_count: " << buffer_count_ << " shm_key_offset: " << std::to_string(key) << hostmap << "}";
70  }
71  ss << "} destinations: {";
72  for (int jj = senders_; jj < senders_ + receivers_; ++jj)
73  {
74  ss << "d" << jj << ": { transferPluginType: " << type << " destination_rank: " << jj << " max_fragment_size_words: " << max_payload_size_ << " buffer_count: " << buffer_count_ << " shm_key_offset: " << std::to_string(key) << hostmap << "}";
75  }
76  ss << "}";
77 
78  make_ParameterSet(ss.str(), ps_);
79 
80 
81  std::cout << "Going to configure with ParameterSet: " << ps_.to_string() << std::endl;
82 }
83 
85 {
86  TLOG_ARB(11, "TransferTest") << "runTest BEGIN" << TLOG_ENDL;
87  start_time_ = std::chrono::steady_clock::now();
88  std::pair<size_t, double> result;
89  if (my_rank >= senders_ + receivers_) return 0;
90  if (my_rank < senders_)
91  {
92  result = do_sending();
93  }
94  else
95  {
96  result = do_receiving();
97  }
98  auto duration = std::chrono::duration_cast<artdaq::TimeUtils::seconds>(std::chrono::steady_clock::now() - start_time_).count();
99  std::cout << (my_rank < senders_ ? "Sent " : "Received ") << result.first << " bytes in " << duration << " seconds ( " << formatBytes(result.first / duration) << "/s )." << std::endl;
100  std::cout << "Rate of " << (my_rank < senders_ ? "sending" : "receiving") << ": " << formatBytes(result.first / result.second) << "/s." << std::endl;
101  metricMan_.do_stop();
102  metricMan_.shutdown();
103  TLOG_ARB(11, "TransferTest") << "runTest DONE" << TLOG_ENDL;
104  return 0;
105 }
106 
107 std::pair<size_t, double> artdaq::TransferTest::do_sending()
108 {
109  TLOG_ARB(7, "TransferTest") << "do_sending entered RawFragmentHeader::num_words()=" << std::to_string(artdaq::detail::RawFragmentHeader::num_words()) << TLOG_ENDL;
110 
111  size_t totalSize = 0;
112  double totalTime = 0;
113  artdaq::DataSenderManager sender(ps_);
114 
115  unsigned data_size_wrds = max_payload_size_ / sizeof(artdaq::RawDataType) - artdaq::detail::RawFragmentHeader::num_words();
116  if (data_size_wrds < 8) data_size_wrds = 8; // min size
117  artdaq::Fragment frag(data_size_wrds);
118 
119  if (validate_mode_)
120  {
121  artdaq::RawDataType gen_seed = 0;
122 
123  std::generate_n(frag.dataBegin(), data_size_wrds, [&]() { return ++gen_seed; });
124  for (size_t ii = 0; ii < frag.dataSize(); ++ii)
125  {
126  if (*(frag.dataBegin() + ii) != ii + 1)
127  {
128  TLOG_ERROR("TransferTest") << "Data corruption detected! (" << std::to_string(*(frag.dataBegin() + ii)) << " != " << std::to_string(ii + 1) << ") Aborting!" << TLOG_ENDL;
129  exit(1);
130  }
131  }
132  }
133 
134  int metric_send_interval = sends_each_sender_ / 1000 > 1 ? sends_each_sender_ / 1000 : 1;
135  auto init_time_metric = 0.0;
136  auto send_time_metric = 0.0;
137  auto after_time_metric = 0.0;
138  auto send_size_metric = 0.0;
139 
140  for (int ii = 0; ii < sends_each_sender_; ++ii)
141  {
142  auto loop_start = std::chrono::steady_clock::now();
143  TLOG_ARB(7, "TransferTest") << "sender rank " << my_rank << " #" << ii << " resized bytes=" << std::to_string(frag.sizeBytes()) << TLOG_ENDL;
144  totalSize += frag.sizeBytes();
145 
146  //unsigned sndDatSz = data_size_wrds;
147  frag.setSequenceID(ii);
148  frag.setFragmentID(my_rank);
149  frag.setSystemType(artdaq::Fragment::DataFragmentType);
150  /*
151  artdaq::Fragment::iterator it = frag.dataBegin();
152  *it = my_rank;
153  *++it = ii;
154  *++it = sndDatSz;*/
155 
156  auto send_start = std::chrono::steady_clock::now();
157  sender.sendFragment(std::move(frag));
158  auto after_send = std::chrono::steady_clock::now();
159  TLOG_TRACE("TransferTest") << "Sender " << my_rank << " sent fragment " << ii << TLOG_ENDL;
160  //usleep( (data_size_wrds*sizeof(artdaq::RawDataType))/233 );
161 
162  frag = artdaq::Fragment(data_size_wrds); // replace/renew
163  if (validate_mode_)
164  {
165  artdaq::RawDataType gen_seed = ii + 1;
166 
167  std::generate_n(frag.dataBegin(), data_size_wrds, [&]() { return ++gen_seed; });
168  for (size_t jj = 0; jj < frag.dataSize(); ++jj)
169  {
170  if (*(frag.dataBegin() + jj) != (ii + 1) + jj + 1)
171  {
172  TLOG_ERROR("TransferTest") << "Input Data corruption detected! (" << std::to_string(*(frag.dataBegin() + jj)) << " != " << std::to_string(ii + jj + 2) << " at position " << ii << ") Aborting!" << TLOG_ENDL;
173  exit(1);
174  }
175  }
176  }
177  TLOG_ARB(9, "TransferTest") << "sender rank " << my_rank << " frag replaced" << TLOG_ENDL;
178 
179  auto total_send_time = std::chrono::duration_cast<artdaq::TimeUtils::seconds>(after_send - send_start).count();
180  totalTime += total_send_time;
181  send_time_metric += total_send_time;
182  send_size_metric += data_size_wrds * sizeof(artdaq::RawDataType);
183  after_time_metric += std::chrono::duration_cast<artdaq::TimeUtils::seconds>(std::chrono::steady_clock::now() - after_send).count();
184  init_time_metric += std::chrono::duration_cast<artdaq::TimeUtils::seconds>(send_start - loop_start).count();
185 
186  if (metricMan && ii % metric_send_interval == 0)
187  {
188  metricMan->sendMetric("send_init_time", init_time_metric, "seconds", 3, MetricMode::Accumulate);
189  metricMan->sendMetric("total_send_time", send_time_metric, "seconds", 3, MetricMode::Accumulate);
190  metricMan->sendMetric("after_send_time", after_time_metric, "seconds", 3, MetricMode::Accumulate);
191  metricMan->sendMetric("send_rate", send_size_metric / send_time_metric, "B/s", 3, MetricMode::Average);
192  init_time_metric = 0.0;
193  send_time_metric = 0.0;
194  after_time_metric = 0.0;
195  send_size_metric = 0.0;
196  }
197  }
198 
199  return std::make_pair(totalSize, totalTime);
200 } // do_sending
201 
202 std::pair<size_t, double> artdaq::TransferTest::do_receiving()
203 {
204  TLOG_ARB(7, "TransferTest") << "do_receiving entered" << TLOG_ENDL;
205 
206  artdaq::FragmentReceiverManager receiver(ps_);
207  receiver.start_threads();
208  int counter = receives_each_receiver_;
209  size_t totalSize = 0;
210  double totalTime = 0;
211  bool first = true;
212  int activeSenders = senders_;
213  auto end_loop = std::chrono::steady_clock::now();
214 
215  auto recv_size_metric = 0.0;
216  auto recv_time_metric = 0.0;
217  auto input_wait_metric = 0.0;
218  auto init_wait_metric = 0.0;
219  int metric_send_interval = receives_each_receiver_ / 1000 > 1 ? receives_each_receiver_ : 1;
220 
221  while (activeSenders > 0)
222  {
223  auto start_loop = std::chrono::steady_clock::now();
224  TLOG_ARB(7, "TransferTest") << "do_receiving: Counter is " << counter << ", calling recvFragment" << TLOG_ENDL;
226  auto before_receive = std::chrono::steady_clock::now();
227  init_wait_metric += std::chrono::duration_cast<artdaq::TimeUtils::seconds>(before_receive - start_loop).count();
228 
229  auto ignoreFragPtr = receiver.recvFragment(senderSlot);
230  auto after_receive = std::chrono::steady_clock::now();
231  size_t thisSize = 0;
232  if (senderSlot != artdaq::TransferInterface::RECV_TIMEOUT && ignoreFragPtr)
233  {
234  if (ignoreFragPtr->type() == artdaq::Fragment::EndOfDataFragmentType)
235  {
236  std::cout << "Receiver " << my_rank << " received EndOfData Fragment from Sender " << senderSlot << std::endl;
237  activeSenders--;
238  }
239  else
240  {
241  if (first)
242  {
243  start_time_ = std::chrono::steady_clock::now();
244  first = false;
245  }
246  counter--;
247  TLOG_INFO("TransferTest") << "Receiver " << my_rank << " received fragment " << receives_each_receiver_ - counter
248  << " with seqID " << std::to_string(ignoreFragPtr->sequenceID()) << " from Sender " << senderSlot << " (Expecting " << counter << " more)" << TLOG_ENDL;
249  thisSize = ignoreFragPtr->size() * sizeof(artdaq::RawDataType);
250  totalSize += thisSize;
251  if (validate_mode_)
252  {
253  for (size_t ii = 0; ii < ignoreFragPtr->dataSize(); ++ii)
254  {
255  if (*(ignoreFragPtr->dataBegin() + ii) != ignoreFragPtr->sequenceID() + ii + 1)
256  {
257  TLOG_ERROR("TransferTest") << "Output Data corruption detected! (" << std::to_string(*(ignoreFragPtr->dataBegin() + ii)) << " != " << std::to_string(ignoreFragPtr->sequenceID() + ii + 1) << " at position " << ii << ") Aborting!" << TLOG_ENDL;
258  exit(1);
259  }
260  }
261  }
262  }
263  input_wait_metric += std::chrono::duration_cast<artdaq::TimeUtils::seconds>(after_receive - end_loop).count();
264  }
265  TLOG_ARB(7, "TransferTest") << "do_receiving: Recv Loop end, counter is " << counter << TLOG_ENDL;
266  auto total_recv_time = std::chrono::duration_cast<artdaq::TimeUtils::seconds>(after_receive - before_receive).count();
267  recv_time_metric += total_recv_time;
268  totalTime += total_recv_time;
269  recv_size_metric += thisSize;
270 
271  if (metricMan && counter % metric_send_interval == 0)
272  {
273  metricMan->sendMetric("input_wait", input_wait_metric, "seconds", 3, MetricMode::Accumulate);
274  metricMan->sendMetric("recv_init_time", init_wait_metric, "seconds", 3, MetricMode::Accumulate);
275  metricMan->sendMetric("total_recv_time", recv_time_metric, "seconds", 3, MetricMode::Accumulate);
276  metricMan->sendMetric("recv_rate", recv_size_metric / recv_time_metric, "B/s", 3, MetricMode::Average);
277 
278  input_wait_metric = 0.0;
279  init_wait_metric = 0.0;
280  recv_time_metric = 0.0;
281  recv_size_metric = 0.0;
282  }
283  end_loop = std::chrono::steady_clock::now();
284  }
285 
286  return std::make_pair(totalSize, totalTime);
287 }
int runTest()
Run the test as configured.
Definition: TransferTest.cc:84
Sends Fragment objects using TransferInterface plugins. Uses Routing Tables if confgiured, otherwise will Round-Robin Fragments to the destinations.
Receives Fragment objects from one or more DataSenderManager instances using TransferInterface plugin...
static const int RECV_TIMEOUT
Value to be returned upon receive timeout. Because receivers otherwise return rank, this is also the limit on the number of ranks that artdaq currently supports.
TransferTest(fhicl::ParameterSet psi, uint32_t key)
TransferTest Constructor.
Definition: TransferTest.cc:11