00001 #include "proto/TransferTest.hh"
00002
00003 #include "artdaq-core/Data/Fragment.hh"
00004 #include "artdaq/DAQrate/DataReceiverManager.hh"
00005 #include "artdaq/DAQrate/DataSenderManager.hh"
00006
00007 #include "artdaq/DAQdata/Globals.hh"
00008
00009 #include "fhiclcpp/make_ParameterSet.h"
00010
00011 #define TRACE_NAME "TransferTest"
00012
00013 artdaq::TransferTest::TransferTest(fhicl::ParameterSet psi)
00014 : senders_(psi.get<int>("num_senders"))
00015 , receivers_(psi.get<int>("num_receivers"))
00016 , sends_each_sender_(psi.get<int>("sends_per_sender"))
00017 , receives_each_receiver_(senders_ * sends_each_sender_ / receivers_)
00018 , buffer_count_(psi.get<int>("buffer_count", 10))
00019 , max_payload_size_(psi.get<size_t>("fragment_size", 0x100000))
00020 , ps_()
00021 , validate_mode_(psi.get<bool>("validate_data_mode", false))
00022 {
00023 TRACE(10, "TransferTest CONSTRUCTOR");
00024 metricMan = &metricMan_;
00025
00026 fhicl::ParameterSet metric_pset;
00027
00028 try
00029 {
00030 metric_pset = psi.get<fhicl::ParameterSet>("metrics");
00031 }
00032 catch (...) {}
00033
00034 try
00035 {
00036 std::string name = "TransferTest" + std::to_string(my_rank);
00037 metricMan_.initialize(metric_pset, name);
00038 metricMan_.do_start();
00039 }
00040 catch (...) {}
00041
00042 std::string type(psi.get<std::string>("transfer_plugin_type", "Shmem"));
00043
00044 if (receivers_ > 0)
00045 {
00046 if (senders_ * sends_each_sender_ % receivers_ != 0)
00047 {
00048 std::cout << "Adding sends so that sends_each_sender * num_sending_ranks is a multiple of num_receiving_ranks" << std::endl;
00049 while (senders_ * sends_each_sender_ % receivers_ != 0)
00050 {
00051 sends_each_sender_++;
00052 }
00053 receives_each_receiver_ = senders_ * sends_each_sender_ / receivers_;
00054 std::cout << "sends_each_sender is now " << sends_each_sender_ << std::endl;
00055 psi.put_or_replace("sends_per_sender", sends_each_sender_);
00056 }
00057 }
00058
00059 std::string hostmap = "";
00060 if (psi.has_key("hostmap"))
00061 {
00062 hostmap = " host_map: @local::hostmap";
00063 }
00064
00065 std::stringstream ss;
00066 ss << psi.to_string();
00067 ss << " sources: {";
00068 for (int ii = 0; ii < senders_; ++ii)
00069 {
00070 ss << "s" << ii << ": { transferPluginType: " << type << " source_rank: " << ii << " max_fragment_size_words: " << max_payload_size_ << " buffer_count: " << buffer_count_ << hostmap << "}";
00071 }
00072 ss << "} destinations: {";
00073 for (int jj = senders_; jj < senders_ + receivers_; ++jj)
00074 {
00075 ss << "d" << jj << ": { transferPluginType: " << type << " destination_rank: " << jj << " max_fragment_size_words: " << max_payload_size_ << " buffer_count: " << buffer_count_ << hostmap << "}";
00076 }
00077 ss << "}";
00078
00079 make_ParameterSet(ss.str(), ps_);
00080
00081
00082 std::cout << "Going to configure with ParameterSet: " << ps_.to_string() << std::endl;
00083 }
00084
00085 int artdaq::TransferTest::runTest()
00086 {
00087 TRACE(11, "TransferTest::runTest BEGIN");
00088 start_time_ = std::chrono::steady_clock::now();
00089 std::pair<size_t, double> result;
00090 if (my_rank < senders_)
00091 {
00092 result = do_sending();
00093 }
00094 else
00095 {
00096 result = do_receiving();
00097 }
00098 auto duration = std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(std::chrono::steady_clock::now() - start_time_).count();
00099 std::cout << (my_rank < senders_ ? "Sent " : "Received ") << result.first << " bytes in " << duration << " seconds ( " << formatBytes(result.first / duration) << "/s )." << std::endl;
00100 std::cout << "Rate of " << (my_rank < senders_ ? "sending" : "receiving") << ": " << formatBytes(result.first / result.second) << "/s." << std::endl;
00101 metricMan_.do_stop();
00102 metricMan_.shutdown();
00103 TRACE(11, "TransferTest::runTest DONE");
00104 return 0;
00105 }
00106
00107 std::pair<size_t, double> artdaq::TransferTest::do_sending()
00108 {
00109 TRACE(7, "do_sending entered RawFragmentHeader::num_words()=%lu"
00110 , artdaq::detail::RawFragmentHeader::num_words());
00111
00112 size_t totalSize = 0;
00113 double totalTime = 0;
00114 artdaq::DataSenderManager sender(ps_);
00115
00116 unsigned data_size_wrds = max_payload_size_ / sizeof(artdaq::RawDataType) - artdaq::detail::RawFragmentHeader::num_words();
00117 if (data_size_wrds < 8) data_size_wrds = 8;
00118 artdaq::Fragment frag(data_size_wrds);
00119
00120 if (validate_mode_)
00121 {
00122 artdaq::RawDataType gen_seed = 0;
00123
00124 std::generate_n(frag.dataBegin(), data_size_wrds, [&]() { return ++gen_seed; });
00125 for (size_t ii = 0; ii < frag.dataSize(); ++ii)
00126 {
00127 if (*(frag.dataBegin() + ii) != ii + 1)
00128 {
00129 TLOG_ERROR("TransferTest") << "Data corruption detected! (" << std::to_string(*(frag.dataBegin() + ii)) << " != " << std::to_string(ii + 1) << ") Aborting!" << TLOG_ENDL;
00130 exit(1);
00131 }
00132 }
00133 }
00134
00135 for (int ii = 0; ii < sends_each_sender_; ++ii)
00136 {
00137 auto loop_start = std::chrono::steady_clock::now();
00138 TRACE(7, "sender rank %d #%u resized bytes=%ld", my_rank, ii, frag.sizeBytes());
00139 totalSize += frag.sizeBytes();
00140
00141
00142 frag.setSequenceID(ii);
00143 frag.setFragmentID(my_rank);
00144 frag.setSystemType(artdaq::Fragment::DataFragmentType);
00145
00146
00147
00148
00149
00150
00151 auto send_start = std::chrono::steady_clock::now();
00152 sender.sendFragment(std::move(frag));
00153 auto after_send = std::chrono::steady_clock::now();
00154 TRACE(1, "Sender %d sent fragment %d", my_rank, ii);
00155
00156
00157 frag = artdaq::Fragment(data_size_wrds);
00158 if (validate_mode_)
00159 {
00160 artdaq::RawDataType gen_seed = 0;
00161
00162 std::generate_n(frag.dataBegin(), data_size_wrds, [&]() { return ++gen_seed; });
00163 for (size_t ii = 0; ii < frag.dataSize(); ++ii)
00164 {
00165 if (*(frag.dataBegin() + ii) != ii + 1)
00166 {
00167 TLOG_ERROR("TransferTest") << "Data corruption detected! (" << std::to_string(*(frag.dataBegin() + ii)) << " != " << std::to_string(ii + 1) << ") Aborting!" << TLOG_ENDL;
00168 exit(1);
00169 }
00170 }
00171 }
00172 TRACE(9, "sender rank %d frag replaced", my_rank);
00173
00174 auto total_send_time = std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(after_send - send_start).count();
00175 totalTime += total_send_time;
00176 if (metricMan && ii % 100 == 0)
00177 {
00178 metricMan->sendMetric("send_init_time", std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(send_start - loop_start).count(), "seconds", 3);
00179 metricMan->sendMetric("total_send_time", total_send_time, "seconds", 3);
00180 metricMan->sendMetric("after_send_time", std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(std::chrono::steady_clock::now() - after_send).count(), "seconds", 3);
00181 metricMan->sendMetric("send_rate", data_size_wrds * sizeof(artdaq::RawDataType) / total_send_time, "B/s", 3);
00182 }
00183 }
00184
00185 return std::make_pair(totalSize, totalTime);
00186 }
00187
00188 std::pair<size_t, double> artdaq::TransferTest::do_receiving()
00189 {
00190 TRACE(7, "do_receiving entered");
00191 artdaq::DataReceiverManager receiver(ps_);
00192 receiver.start_threads();
00193 int counter = receives_each_receiver_;
00194 size_t totalSize = 0;
00195 double totalTime = 0;
00196 bool first = true;
00197 int activeSenders = senders_;
00198 auto end_loop = std::chrono::steady_clock::now();
00199
00200 while (activeSenders > 0)
00201 {
00202 auto start_loop = std::chrono::steady_clock::now();
00203 TRACE(7, "TransferTest::do_receiving: Counter is %d, calling recvFragment", counter);
00204 int senderSlot = artdaq::TransferInterface::RECV_TIMEOUT;
00205 auto before_receive = std::chrono::steady_clock::now();
00206 auto ignoreFragPtr = receiver.recvFragment(senderSlot);
00207 auto after_receive = std::chrono::steady_clock::now();
00208 size_t thisSize = 0;
00209 if (senderSlot != artdaq::TransferInterface::RECV_TIMEOUT && ignoreFragPtr)
00210 {
00211 if (ignoreFragPtr->type() == artdaq::Fragment::EndOfDataFragmentType)
00212 {
00213 std::cout << "Receiver " << my_rank << " received EndOfData Fragment from Sender " << senderSlot << std::endl;
00214 activeSenders--;
00215 }
00216 else
00217 {
00218 if (first)
00219 {
00220 start_time_ = std::chrono::steady_clock::now();
00221 first = false;
00222 }
00223 counter--;
00224 TRACE(1, "Receiver %d received fragment %d with seqID %lu from Sender %d (Expecting %d more)"
00225 , my_rank, receives_each_receiver_ - counter, ignoreFragPtr->sequenceID(), senderSlot, counter);
00226 thisSize = ignoreFragPtr->size() * sizeof(artdaq::RawDataType);
00227 totalSize += thisSize;
00228 if (validate_mode_)
00229 {
00230 for (size_t ii = 0; ii < ignoreFragPtr->dataSize(); ++ii)
00231 {
00232 if (*(ignoreFragPtr->dataBegin() + ii) != ii + 1)
00233 {
00234 TLOG_ERROR("TransferTest") << "Data corruption detected! (" << std::to_string(*(ignoreFragPtr->dataBegin() + ii)) << " != " << std::to_string(ii + 1) << ") Aborting!" << TLOG_ENDL;
00235 exit(1);
00236 }
00237 }
00238 }
00239 }
00240 if (metricMan)
00241 {
00242 metricMan->sendMetric("input_wait", std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(after_receive - end_loop).count(), "seconds", 3);
00243 }
00244
00245 }
00246 TRACE(7, "TransferTest::do_receiving: Recv Loop end, counter is %d", counter);
00247 auto total_recv_time = std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(after_receive - before_receive).count();
00248 totalTime += total_recv_time;
00249 if (metricMan && counter % 100 == 0)
00250 {
00251 metricMan->sendMetric("recv_init_time", std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(before_receive - start_loop).count(), "seconds", 3);
00252 metricMan->sendMetric("total_recv_time", total_recv_time, "seconds", 3);
00253 metricMan->sendMetric("recv_rate", thisSize / total_recv_time, "B/s", 3);
00254 }
00255 end_loop = std::chrono::steady_clock::now();
00256 }
00257
00258 return std::make_pair(totalSize, totalTime);
00259 }