00001 #include "proto/TransferTest.hh"
00002
00003 #include "artdaq-core/Data/Fragment.hh"
00004 #include "proto/FragmentReceiverManager.hh"
00005 #include "artdaq/DAQrate/DataSenderManager.hh"
00006
00007 #include "artdaq/DAQdata/Globals.hh"
00008
00009 #include "fhiclcpp/make_ParameterSet.h"
00010
00011 artdaq::TransferTest::TransferTest(fhicl::ParameterSet psi, uint32_t key)
00012 : senders_(psi.get<int>("num_senders"))
00013 , receivers_(psi.get<int>("num_receivers"))
00014 , sends_each_sender_(psi.get<int>("sends_per_sender"))
00015 , receives_each_receiver_(senders_ * sends_each_sender_ / receivers_)
00016 , buffer_count_(psi.get<int>("buffer_count", 10))
00017 , max_payload_size_(psi.get<size_t>("fragment_size", 0x100000))
00018 , ps_()
00019 , validate_mode_(psi.get<bool>("validate_data_mode", false))
00020 {
00021 TLOG_ARB(10, "TransferTest") << "CONSTRUCTOR" << TLOG_ENDL;
00022 metricMan = &metricMan_;
00023
00024 fhicl::ParameterSet metric_pset;
00025
00026 try
00027 {
00028 metric_pset = psi.get<fhicl::ParameterSet>("metrics");
00029 }
00030 catch (...) {}
00031
00032 try
00033 {
00034 std::string name = "TransferTest" + std::to_string(my_rank);
00035 metricMan_.initialize(metric_pset, name);
00036 metricMan_.do_start();
00037 }
00038 catch (...) {}
00039
00040 std::string type(psi.get<std::string>("transfer_plugin_type", "Shmem"));
00041
00042 if (receivers_ > 0)
00043 {
00044 if (senders_ * sends_each_sender_ % receivers_ != 0)
00045 {
00046 std::cout << "Adding sends so that sends_each_sender * num_sending_ranks is a multiple of num_receiving_ranks" << std::endl;
00047 while (senders_ * sends_each_sender_ % receivers_ != 0)
00048 {
00049 sends_each_sender_++;
00050 }
00051 receives_each_receiver_ = senders_ * sends_each_sender_ / receivers_;
00052 std::cout << "sends_each_sender is now " << sends_each_sender_ << std::endl;
00053 psi.put_or_replace("sends_per_sender", sends_each_sender_);
00054 }
00055 }
00056
00057 std::string hostmap = "";
00058 if (psi.has_key("hostmap"))
00059 {
00060 auto masterPortOffset = (key % (36728 - 10240)) + 1024;
00061 hostmap = " host_map: @local::hostmap master_port_offset: " + std::to_string(masterPortOffset);
00062 }
00063
00064 std::stringstream ss;
00065 ss << psi.to_string();
00066 ss << " sources: {";
00067 for (int ii = 0; ii < senders_; ++ii)
00068 {
00069 ss << "s" << ii << ": { transferPluginType: " << type << " source_rank: " << ii << " max_fragment_size_words: " << max_payload_size_ << " buffer_count: " << buffer_count_ << " shm_key_offset: " << std::to_string(key) << hostmap << "}";
00070 }
00071 ss << "} destinations: {";
00072 for (int jj = senders_; jj < senders_ + receivers_; ++jj)
00073 {
00074 ss << "d" << jj << ": { transferPluginType: " << type << " destination_rank: " << jj << " max_fragment_size_words: " << max_payload_size_ << " buffer_count: " << buffer_count_ << " shm_key_offset: " << std::to_string(key) << hostmap << "}";
00075 }
00076 ss << "}";
00077
00078 make_ParameterSet(ss.str(), ps_);
00079
00080
00081 std::cout << "Going to configure with ParameterSet: " << ps_.to_string() << std::endl;
00082 }
00083
00084 int artdaq::TransferTest::runTest()
00085 {
00086 TLOG_ARB(11, "TransferTest") << "runTest BEGIN" << TLOG_ENDL;
00087 start_time_ = std::chrono::steady_clock::now();
00088 std::pair<size_t, double> result;
00089 if (my_rank >= senders_ + receivers_) return 0;
00090 if (my_rank < senders_)
00091 {
00092 result = do_sending();
00093 }
00094 else
00095 {
00096 result = do_receiving();
00097 }
00098 auto duration = std::chrono::duration_cast<artdaq::TimeUtils::seconds>(std::chrono::steady_clock::now() - start_time_).count();
00099 std::cout << (my_rank < senders_ ? "Sent " : "Received ") << result.first << " bytes in " << duration << " seconds ( " << formatBytes(result.first / duration) << "/s )." << std::endl;
00100 std::cout << "Rate of " << (my_rank < senders_ ? "sending" : "receiving") << ": " << formatBytes(result.first / result.second) << "/s." << std::endl;
00101 metricMan_.do_stop();
00102 metricMan_.shutdown();
00103 TLOG_ARB(11, "TransferTest") << "runTest DONE" << TLOG_ENDL;
00104 return 0;
00105 }
00106
00107 std::pair<size_t, double> artdaq::TransferTest::do_sending()
00108 {
00109 TLOG_ARB(7, "TransferTest") << "do_sending entered RawFragmentHeader::num_words()=" << std::to_string(artdaq::detail::RawFragmentHeader::num_words()) << TLOG_ENDL;
00110
00111 size_t totalSize = 0;
00112 double totalTime = 0;
00113 artdaq::DataSenderManager sender(ps_);
00114
00115 unsigned data_size_wrds = max_payload_size_ / sizeof(artdaq::RawDataType) - artdaq::detail::RawFragmentHeader::num_words();
00116 if (data_size_wrds < 8) data_size_wrds = 8;
00117 artdaq::Fragment frag(data_size_wrds);
00118
00119 if (validate_mode_)
00120 {
00121 artdaq::RawDataType gen_seed = 0;
00122
00123 std::generate_n(frag.dataBegin(), data_size_wrds, [&]() { return ++gen_seed; });
00124 for (size_t ii = 0; ii < frag.dataSize(); ++ii)
00125 {
00126 if (*(frag.dataBegin() + ii) != ii + 1)
00127 {
00128 TLOG_ERROR("TransferTest") << "Data corruption detected! (" << std::to_string(*(frag.dataBegin() + ii)) << " != " << std::to_string(ii + 1) << ") Aborting!" << TLOG_ENDL;
00129 exit(1);
00130 }
00131 }
00132 }
00133
00134 int metric_send_interval = sends_each_sender_ / 1000 > 1 ? sends_each_sender_ / 1000 : 1;
00135 auto init_time_metric = 0.0;
00136 auto send_time_metric = 0.0;
00137 auto after_time_metric = 0.0;
00138 auto send_size_metric = 0.0;
00139
00140 for (int ii = 0; ii < sends_each_sender_; ++ii)
00141 {
00142 auto loop_start = std::chrono::steady_clock::now();
00143 TLOG_ARB(7, "TransferTest") << "sender rank " << my_rank << " #" << ii << " resized bytes=" << std::to_string(frag.sizeBytes()) << TLOG_ENDL;
00144 totalSize += frag.sizeBytes();
00145
00146
00147 frag.setSequenceID(ii);
00148 frag.setFragmentID(my_rank);
00149 frag.setSystemType(artdaq::Fragment::DataFragmentType);
00150
00151
00152
00153
00154
00155
00156 auto send_start = std::chrono::steady_clock::now();
00157 sender.sendFragment(std::move(frag));
00158 auto after_send = std::chrono::steady_clock::now();
00159 TLOG_TRACE("TransferTest") << "Sender " << my_rank << " sent fragment " << ii << TLOG_ENDL;
00160
00161
00162 frag = artdaq::Fragment(data_size_wrds);
00163 if (validate_mode_)
00164 {
00165 artdaq::RawDataType gen_seed = ii + 1;
00166
00167 std::generate_n(frag.dataBegin(), data_size_wrds, [&]() { return ++gen_seed; });
00168 for (size_t jj = 0; jj < frag.dataSize(); ++jj)
00169 {
00170 if (*(frag.dataBegin() + jj) != (ii + 1) + jj + 1)
00171 {
00172 TLOG_ERROR("TransferTest") << "Input Data corruption detected! (" << std::to_string(*(frag.dataBegin() + jj)) << " != " << std::to_string(ii + jj + 2) << " at position " << ii << ") Aborting!" << TLOG_ENDL;
00173 exit(1);
00174 }
00175 }
00176 }
00177 TLOG_ARB(9, "TransferTest") << "sender rank " << my_rank << " frag replaced" << TLOG_ENDL;
00178
00179 auto total_send_time = std::chrono::duration_cast<artdaq::TimeUtils::seconds>(after_send - send_start).count();
00180 totalTime += total_send_time;
00181 send_time_metric += total_send_time;
00182 send_size_metric += data_size_wrds * sizeof(artdaq::RawDataType);
00183 after_time_metric += std::chrono::duration_cast<artdaq::TimeUtils::seconds>(std::chrono::steady_clock::now() - after_send).count();
00184 init_time_metric += std::chrono::duration_cast<artdaq::TimeUtils::seconds>(send_start - loop_start).count();
00185
00186 if (metricMan && ii % metric_send_interval == 0)
00187 {
00188 metricMan->sendMetric("send_init_time", init_time_metric, "seconds", 3, MetricMode::Accumulate);
00189 metricMan->sendMetric("total_send_time", send_time_metric, "seconds", 3, MetricMode::Accumulate);
00190 metricMan->sendMetric("after_send_time", after_time_metric, "seconds", 3, MetricMode::Accumulate);
00191 metricMan->sendMetric("send_rate", send_size_metric / send_time_metric, "B/s", 3, MetricMode::Average);
00192 init_time_metric = 0.0;
00193 send_time_metric = 0.0;
00194 after_time_metric = 0.0;
00195 send_size_metric = 0.0;
00196 }
00197 }
00198
00199 return std::make_pair(totalSize, totalTime);
00200 }
00201
00202 std::pair<size_t, double> artdaq::TransferTest::do_receiving()
00203 {
00204 TLOG_ARB(7, "TransferTest") << "do_receiving entered" << TLOG_ENDL;
00205
00206 artdaq::FragmentReceiverManager receiver(ps_);
00207 receiver.start_threads();
00208 int counter = receives_each_receiver_;
00209 size_t totalSize = 0;
00210 double totalTime = 0;
00211 bool first = true;
00212 int activeSenders = senders_;
00213 auto end_loop = std::chrono::steady_clock::now();
00214
00215 auto recv_size_metric = 0.0;
00216 auto recv_time_metric = 0.0;
00217 auto input_wait_metric = 0.0;
00218 auto init_wait_metric = 0.0;
00219 int metric_send_interval = receives_each_receiver_ / 1000 > 1 ? receives_each_receiver_ : 1;
00220
00221 while (activeSenders > 0)
00222 {
00223 auto start_loop = std::chrono::steady_clock::now();
00224 TLOG_ARB(7, "TransferTest") << "do_receiving: Counter is " << counter << ", calling recvFragment" << TLOG_ENDL;
00225 int senderSlot = artdaq::TransferInterface::RECV_TIMEOUT;
00226 auto before_receive = std::chrono::steady_clock::now();
00227 init_wait_metric += std::chrono::duration_cast<artdaq::TimeUtils::seconds>(before_receive - start_loop).count();
00228
00229 auto ignoreFragPtr = receiver.recvFragment(senderSlot);
00230 auto after_receive = std::chrono::steady_clock::now();
00231 size_t thisSize = 0;
00232 if (senderSlot != artdaq::TransferInterface::RECV_TIMEOUT && ignoreFragPtr)
00233 {
00234 if (ignoreFragPtr->type() == artdaq::Fragment::EndOfDataFragmentType)
00235 {
00236 std::cout << "Receiver " << my_rank << " received EndOfData Fragment from Sender " << senderSlot << std::endl;
00237 activeSenders--;
00238 }
00239 else
00240 {
00241 if (first)
00242 {
00243 start_time_ = std::chrono::steady_clock::now();
00244 first = false;
00245 }
00246 counter--;
00247 TLOG_INFO("TransferTest") << "Receiver " << my_rank << " received fragment " << receives_each_receiver_ - counter
00248 << " with seqID " << std::to_string(ignoreFragPtr->sequenceID()) << " from Sender " << senderSlot << " (Expecting " << counter << " more)" << TLOG_ENDL;
00249 thisSize = ignoreFragPtr->size() * sizeof(artdaq::RawDataType);
00250 totalSize += thisSize;
00251 if (validate_mode_)
00252 {
00253 for (size_t ii = 0; ii < ignoreFragPtr->dataSize(); ++ii)
00254 {
00255 if (*(ignoreFragPtr->dataBegin() + ii) != ignoreFragPtr->sequenceID() + ii + 1)
00256 {
00257 TLOG_ERROR("TransferTest") << "Output Data corruption detected! (" << std::to_string(*(ignoreFragPtr->dataBegin() + ii)) << " != " << std::to_string(ignoreFragPtr->sequenceID() + ii + 1) << " at position " << ii << ") Aborting!" << TLOG_ENDL;
00258 exit(1);
00259 }
00260 }
00261 }
00262 }
00263 input_wait_metric += std::chrono::duration_cast<artdaq::TimeUtils::seconds>(after_receive - end_loop).count();
00264 }
00265 TLOG_ARB(7, "TransferTest") << "do_receiving: Recv Loop end, counter is " << counter << TLOG_ENDL;
00266 auto total_recv_time = std::chrono::duration_cast<artdaq::TimeUtils::seconds>(after_receive - before_receive).count();
00267 recv_time_metric += total_recv_time;
00268 totalTime += total_recv_time;
00269 recv_size_metric += thisSize;
00270
00271 if (metricMan && counter % metric_send_interval == 0)
00272 {
00273 metricMan->sendMetric("input_wait", input_wait_metric, "seconds", 3, MetricMode::Accumulate);
00274 metricMan->sendMetric("recv_init_time", init_wait_metric, "seconds", 3, MetricMode::Accumulate);
00275 metricMan->sendMetric("total_recv_time", recv_time_metric, "seconds", 3, MetricMode::Accumulate);
00276 metricMan->sendMetric("recv_rate", recv_size_metric / recv_time_metric, "B/s", 3, MetricMode::Average);
00277
00278 input_wait_metric = 0.0;
00279 init_wait_metric = 0.0;
00280 recv_time_metric = 0.0;
00281 recv_size_metric = 0.0;
00282 }
00283 end_loop = std::chrono::steady_clock::now();
00284 }
00285
00286 return std::make_pair(totalSize, totalTime);
00287 }