00001 #include <chrono>
00002
00003 #include "proto/FragmentReceiverManager.hh"
00004 #include "artdaq/DAQdata/Globals.hh"
00005 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
00006 #include "cetlib_except/exception.h"
00007
00008 artdaq::FragmentReceiverManager::FragmentReceiverManager(const fhicl::ParameterSet& pset)
00009 : stop_requested_(false)
00010 , source_threads_()
00011 , source_plugins_()
00012 , enabled_sources_()
00013 , fragment_store_()
00014 , recv_frag_count_()
00015 , recv_frag_size_()
00016 , recv_seq_count_()
00017 , suppress_noisy_senders_(pset.get<bool>("auto_suppression_enabled", true))
00018 , suppression_threshold_(pset.get<size_t>("max_receive_difference", 50))
00019 , receive_timeout_(pset.get<size_t>("receive_timeout_usec", 100000))
00020 {
00021 TLOG_DEBUG("FragmentReceiverManager") << "Constructor" << TLOG_ENDL;
00022 auto enabled_srcs = pset.get<std::vector<int>>("enabled_sources", std::vector<int>());
00023 auto enabled_srcs_empty = enabled_srcs.size() == 0;
00024 if (enabled_srcs_empty)
00025 {
00026 TLOG_INFO("FragmentReceiverManager") << "enabled_sources not specified, assuming all sources enabled." << TLOG_ENDL;
00027 }
00028 else
00029 {
00030 for (auto& s : enabled_srcs)
00031 {
00032 enabled_sources_.insert(s);
00033 }
00034 }
00035
00036 auto srcs = pset.get<fhicl::ParameterSet>("sources", fhicl::ParameterSet());
00037 for (auto& s : srcs.get_pset_names())
00038 {
00039 try
00040 {
00041 auto transfer = std::unique_ptr<TransferInterface>(MakeTransferPlugin(srcs, s,
00042 TransferInterface::Role::kReceive));
00043 auto source_rank = transfer->source_rank();
00044 if (enabled_srcs_empty) enabled_sources_.insert(source_rank);
00045 source_plugins_[source_rank] = std::move(transfer);
00046 fragment_store_[source_rank];
00047 }
00048 catch (cet::exception ex)
00049 {
00050 TLOG_WARNING("FragmentReceiverManager") << "cet::exception caught while setting up source " << s << ": " << ex.what() << TLOG_ENDL;
00051 }
00052 catch (std::exception ex)
00053 {
00054 TLOG_WARNING("FragmentReceiverManager") << "std::exception caught while setting up source " << s << ": " << ex.what() << TLOG_ENDL;
00055 }
00056 catch (...)
00057 {
00058 TLOG_WARNING("FragmentReceiverManager") << "Non-cet exception caught while setting up source " << s << "." << TLOG_ENDL;
00059 }
00060 }
00061 if (srcs.get_pset_names().size() == 0)
00062 {
00063 TLOG_ERROR("FragmentReceiverManager") << "No sources configured!" << TLOG_ENDL;
00064 }
00065 }
00066
00067 artdaq::FragmentReceiverManager::~FragmentReceiverManager()
00068 {
00069 TLOG_DEBUG("FragmentReceiverManager") << "Destructor" << TLOG_ENDL;
00070 TLOG_ARB(5, "FragmentReceiverManager") << "~FragmentReceiverManager: BEGIN: Setting stop_requested to true, frags=" << std::to_string(count()) << ", bytes=" << std::to_string(byteCount()) << TLOG_ENDL;
00071 stop_requested_ = true;
00072
00073 TLOG_ARB(5, "FragmentReceiverManager") << "~FragmentReceiverManager: Notifying all threads" << TLOG_ENDL;
00074 output_cv_.notify_all();
00075
00076 TLOG_ARB(5, "FragmentReceiverManager") << "~FragmentReceiverManager: Joining all threads" << TLOG_ENDL;
00077 for (auto& s : source_threads_)
00078 {
00079 auto& thread = s.second;
00080 if (thread.joinable()) thread.join();
00081 }
00082 TLOG_ARB(5, "FragmentReceiverManager") << "~FragmentReceiverManager: DONE" << TLOG_ENDL;
00083 }
00084
00085 bool artdaq::FragmentReceiverManager::fragments_ready_() const
00086 {
00087 return get_next_source_() != -1;
00088 }
00089
00090 int artdaq::FragmentReceiverManager::get_next_source_() const
00091 {
00092
00093 for (auto& it : fragment_store_)
00094 {
00095 if (!enabled_sources_.count(it.first)) continue;
00096 if (!it.second.empty()) return it.first;
00097 }
00098 return -1;
00099 }
00100
00101 void artdaq::FragmentReceiverManager::start_threads()
00102 {
00103 for (auto& source : source_plugins_)
00104 {
00105 auto& rank = source.first;
00106 if (enabled_sources_.count(rank))
00107 {
00108 source_threads_[rank] = boost::thread(&FragmentReceiverManager::runReceiver_, this, rank);
00109 }
00110 }
00111 }
00112
00113 artdaq::FragmentPtr artdaq::FragmentReceiverManager::recvFragment(int& rank, size_t timeout_usec)
00114 {
00115 TLOG_ARB(5, "FragmentReceiverManager") <<"recvFragment entered tmo=" << std::to_string(timeout_usec) << " us" << TLOG_ENDL;
00116
00117 if (timeout_usec == 0) timeout_usec = 1000000;
00118
00119 auto ready = fragments_ready_();
00120 size_t waited = 0;
00121 auto wait_amount = timeout_usec / 1000 > 1000 ? timeout_usec / 1000 : 1000;
00122 TLOG_ARB(5, "FragmentReceiverManager") << "recvFragment fragment_ready_=" << ready << " before wait" << TLOG_ENDL;
00123 while (!ready && waited < timeout_usec)
00124 {
00125 {
00126 std::unique_lock<std::mutex> lck(input_cv_mutex_);
00127 input_cv_.wait_for(lck, std::chrono::microseconds(wait_amount));
00128 }
00129 waited += wait_amount;
00130 ready = fragments_ready_();
00131 }
00132 TLOG_ARB(5, "FragmentReceiverManager") << "recvFragment fragment_ready_=" << ready << " after waited=" << std::to_string( waited) << TLOG_ENDL;
00133 if (!ready)
00134 {
00135 TLOG_ARB(5, "FragmentReceiverManager") << "recvFragment: No fragments ready, returning empty" << TLOG_ENDL;
00136 rank = TransferInterface::RECV_TIMEOUT;
00137 return std::unique_ptr<Fragment>{};
00138 }
00139
00140 int current_source = get_next_source_();
00141 FragmentPtr current_fragment = fragment_store_[current_source].front();
00142 output_cv_.notify_all();
00143 rank = current_source;
00144
00145 if (current_fragment != nullptr)
00146 TLOG_ARB(5, "FragmentReceiverManager") << "recvFragment: Done rank="<< rank <<", fragment size="<<std::to_string(current_fragment->size()) << " words, seqId=" << std::to_string( current_fragment->sequenceID()) << TLOG_ENDL;
00147 return std::move(current_fragment);
00148 }
00149
00150 void artdaq::FragmentReceiverManager::runReceiver_(int source_rank)
00151 {
00152 while (!stop_requested_ && enabled_sources_.count(source_rank))
00153 {
00154 TLOG_ARB(16, "FragmentReceiverManager") << "runReceiver_ "<< source_rank << ": Begin loop" << TLOG_ENDL;
00155 auto is_suppressed = suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount();
00156 while (!stop_requested_ && is_suppressed)
00157 {
00158 TLOG_ARB(6, "FragmentReceiverManager") << "runReceiver_: Suppressing receiver rank " << source_rank << TLOG_ENDL;
00159 if (!is_suppressed) input_cv_.notify_all();
00160 else
00161 {
00162 std::unique_lock<std::mutex> lck(output_cv_mutex_);
00163 output_cv_.wait_for(lck, std::chrono::seconds(1));
00164 }
00165 is_suppressed = suppress_noisy_senders_ && recv_seq_count_.slotCount(source_rank) > suppression_threshold_ + recv_seq_count_.minCount();
00166 }
00167 if (stop_requested_) return;
00168
00169 auto start_time = std::chrono::steady_clock::now();
00170 TLOG_ARB(16, "FragmentReceiverManager") << "runReceiver_: Calling receiveFragment" << TLOG_ENDL;
00171 auto fragment = std::unique_ptr<Fragment>(new Fragment());
00172 #if 0
00173 auto ret = source_plugins_[source_rank]->receiveFragment(*fragment, receive_timeout_);
00174 TLOG_ARB(16, "FragmentReceiverManager") << "runReceiver_: Done with receiveFragment, ret=" << ret << " (should be " << source_rank << ")" << TLOG_ENDL;
00175 if (ret != source_rank) continue;
00176 #else
00177 artdaq::detail::RawFragmentHeader hdr;
00178 auto ret1 = source_plugins_[source_rank]->receiveFragmentHeader(hdr, receive_timeout_);
00179 TLOG_ARB(16, "FragmentReceiverManager") << "runReceiver_: Done with receiveFragmentHeader, ret1=" << ret1 << " (should be " << source_rank << ")" << TLOG_ENDL;
00180
00181 if (ret1 != source_rank) continue;
00182
00183 fragment->resize(hdr.word_count - hdr.num_words());
00184 memcpy(fragment->headerAddress(), &hdr, hdr.num_words() * sizeof(artdaq::RawDataType));
00185 auto ret2 = source_plugins_[source_rank]->receiveFragmentData(fragment->headerAddress() + hdr.num_words(), hdr.word_count - hdr.num_words());
00186 if (ret2 != ret1)
00187 {
00188 TLOG_ERROR("FragmentReceiverManager") << "ReceiveFragmentHeader returned " << ret1 << ", but ReceiveFragmentData returned " << ret2 << TLOG_ENDL;
00189 continue;
00190 }
00191 #endif
00192
00193
00194 if (fragment->type() == artdaq::Fragment::EndOfDataFragmentType)
00195 {
00196 fragment_store_[source_rank].SetEndOfData(*reinterpret_cast<size_t*>(fragment->dataBegin()));
00197 }
00198 else if(fragment->type() == artdaq::Fragment::DataFragmentType || fragment->type() == artdaq::Fragment::ContainerFragmentType || fragment->isUserFragmentType(fragment->type()))
00199 {
00200 recv_frag_count_.incSlot(source_rank);
00201 recv_frag_size_.incSlot(source_rank, fragment->size() * sizeof(RawDataType));
00202 recv_seq_count_.setSlot(source_rank, fragment->sequenceID());
00203 }
00204 else
00205 {
00206 continue;
00207 }
00208
00209
00210
00211 if (metricMan)
00212 {
00213 TLOG_ARB(6, "FragmentReceiverManager") << "runReceiver_: Sending receive stats" << TLOG_ENDL;
00214 auto delta_t = std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(std::chrono::steady_clock::now() - start_time).count();
00215 metricMan->sendMetric("Data Receive Time From Rank " + std::to_string(source_rank), delta_t, "s", 1, MetricMode::Accumulate);
00216 metricMan->sendMetric("Data Receive Size From Rank " + std::to_string(source_rank), static_cast<unsigned long>(fragment->size() * sizeof(RawDataType)), "B", 1, MetricMode::Accumulate);
00217 metricMan->sendMetric("Data Receive Rate From Rank " + std::to_string(source_rank), fragment->size() * sizeof(RawDataType) / delta_t, "B/s", 1, MetricMode::Average);
00218 }
00219
00220
00221 fragment_store_[source_rank].emplace_back(std::move(fragment));
00222 input_cv_.notify_all();
00223
00224 if (fragment_store_[source_rank].GetEndOfData() <= recv_frag_count_.slotCount(source_rank))
00225 {
00226 return;
00227 }
00228 }
00229 }