00001 #include "art/Framework/Art/artapp.h"
00002 #include "artdaq-core/Generators/FragmentGenerator.hh"
00003 #include "artdaq-core/Data/Fragment.hh"
00004 #include "artdaq-core/Generators/makeFragmentGenerator.hh"
00005 #include "MPIProg.hh"
00006 #include "artdaq/DAQrate/DataSenderManager.hh"
00007 #include "artdaq/DAQrate/DataReceiverManager.hh"
00008 #include "artdaq-core/Core/SimpleMemoryReader.hh"
00009 #include "artdaq/DAQrate/quiet_mpi.hh"
00010
00011 #include <boost/program_options.hpp>
00012 #include "fhiclcpp/make_ParameterSet.h"
00013 namespace bpo = boost::program_options;
00014
00015 #include <algorithm>
00016 #include <cmath>
00017 #include <cstdlib>
00018
00019 extern "C"
00020 {
00021 #include <unistd.h>
00022 }
00023
00024 #include <iostream>
00025 #include <memory>
00026 #include <utility>
00027
00028 extern "C"
00029 {
00030 #include <sys/time.h>
00031 #include <sys/resource.h>
00032 }
00033
00037 class Builder : public MPIProg
00038 {
00039 public:
00047 Builder(int argc, char* argv[], fhicl::ParameterSet pset, int key);
00048
00052 void go();
00053
00057 void sink();
00058
00062 void detector();
00063
00064 private:
00065 enum class Role : int
00066 {
00067 DETECTOR,
00068 SINK
00069 };
00070
00071 void printHost(const std::string& functionName) const;
00072
00073 fhicl::ParameterSet daq_pset_;
00074 bool const want_sink_;
00075 bool const want_periodic_sync_;
00076 MPI_Comm local_group_comm_;
00077 Role builder_role_;
00078 };
00079
00080 Builder::Builder(int argc, char* argv[], fhicl::ParameterSet pset, int key) :
00081 MPIProg(argc, argv)
00082 , daq_pset_(pset)
00083 , want_sink_(daq_pset_.get<bool>("want_sink", true))
00084 , want_periodic_sync_(daq_pset_.get<bool>("want_periodic_sync", false))
00085 , local_group_comm_()
00086 {
00087 std::vector<std::string> detectors;
00088 daq_pset_.get_if_present("detectors", detectors);
00089 if (static_cast<size_t>(my_rank) >= detectors.size())
00090 {
00091 builder_role_ = Role::SINK;
00092 }
00093 else
00094 {
00095 builder_role_ = Role::DETECTOR;
00096 }
00097 std::string type(pset.get<std::string>("transfer_plugin_type", "Shmem"));
00098
00099 int senders = pset.get<int>("num_senders");
00100 int receivers = pset.get<int>("num_receivers");
00101 int buffer_count = pset.get<int>("buffer_count", 10);
00102 int max_payload_size = pset.get<size_t>("fragment_size", 0x100000);
00103
00104 std::string hostmap = "";
00105 if (pset.has_key("hostmap"))
00106 {
00107 hostmap = " host_map: @local::hostmap";
00108 }
00109
00110 std::stringstream ss;
00111 ss << pset.to_string();
00112 ss << " sources: {";
00113 for (int ii = 0; ii < senders; ++ii)
00114 {
00115 ss << "s" << ii << ": { transferPluginType: " << type << " source_rank: " << ii << " max_fragment_size_words: " << max_payload_size << " buffer_count: " << buffer_count << " shm_key_offset: " << std::to_string(key) << hostmap << "}";
00116 }
00117 ss << "} destinations: {";
00118 for (int jj = senders; jj < senders + receivers; ++jj)
00119 {
00120 ss << "d" << jj << ": { transferPluginType: " << type << " destination_rank: " << jj << " max_fragment_size_words: " << max_payload_size << " buffer_count: " << buffer_count << " shm_key_offset: " << std::to_string(key) << hostmap << "}";
00121 }
00122 ss << "}";
00123
00124 daq_pset_ = fhicl::ParameterSet();
00125 make_ParameterSet(ss.str(), daq_pset_);
00126
00127
00128 }
00129
00130 void Builder::go()
00131 {
00132
00133
00134
00135
00136
00137
00138
00139 MPI_Barrier(MPI_COMM_WORLD);
00140
00141 MPI_Comm_split(MPI_COMM_WORLD, static_cast<int>(builder_role_), 0, &local_group_comm_);
00142 switch (builder_role_)
00143 {
00144 case Role::SINK:
00145 if (want_sink_)
00146 {
00147 sink();
00148 }
00149 else
00150 {
00151 std::string
00152 msg("WARNING: a sink was instantiated despite want_sink being false:\n"
00153 "set nsinks to 0 in invocation of daqrate?\n");
00154 std::cerr << msg;
00155 MPI_Barrier(MPI_COMM_WORLD);
00156 }
00157 break;
00158 case Role::DETECTOR:
00159 detector();
00160 break;
00161 default:
00162 throw "No such node type";
00163 }
00164 }
00165
00166 void Builder::detector()
00167 {
00168 printHost("detector");
00169 int detector_rank;
00170
00171 MPI_Comm_rank(local_group_comm_, &detector_rank);
00172 assert(!(detector_rank < 0));
00173 std::ostringstream det_ps_name_loc;
00174 std::vector<std::string> detectors;
00175 bool detectors_present = daq_pset_.get_if_present("detectors", detectors);
00176 size_t detectors_size = detectors.size();
00177 if (!(detectors_present && detectors_size))
00178 {
00179 throw cet::exception("Configuration")
00180 << "Unable to find required sequence of detector "
00181 << "parameter set names, \"detectors\".";
00182 }
00183 fhicl::ParameterSet det_ps =
00184 daq_pset_.get<fhicl::ParameterSet>(((detectors_size > static_cast<size_t>(detector_rank)) ? detectors[detector_rank] : detectors[0]));
00185 std::unique_ptr<artdaq::FragmentGenerator> const
00186 gen(artdaq::makeFragmentGenerator
00187 (det_ps.get<std::string>("generator"),
00188 det_ps));
00189 {
00190 artdaq::DataSenderManager h(daq_pset_);
00191 MPI_Barrier(local_group_comm_);
00192
00193
00194 size_t fragments_per_source = -1;
00195 daq_pset_.get_if_present("fragments_per_source", fragments_per_source);
00196 artdaq::FragmentPtrs frags;
00197 size_t fragments_sent = 0;
00198 while (fragments_sent < fragments_per_source && gen->getNext(frags))
00199 {
00200 if (!fragments_sent)
00201 {
00202
00203
00204 MPI_Barrier(local_group_comm_);
00205 }
00206 for (auto& fragPtr : frags)
00207 {
00208 std::cout << "Program::detector: Sending fragment " << fragments_sent + 1 << " of " << fragments_per_source << std::endl;
00209 TLOG_DEBUG("builder") << "Program::detector: Sending fragment " << fragments_sent + 1 << " of " << fragments_per_source << TLOG_ENDL;
00210 h.sendFragment(std::move(*fragPtr));
00211 if (++fragments_sent == fragments_per_source) { break; }
00212 if (want_periodic_sync_ && (fragments_sent % 100) == 0)
00213 {
00214
00215 MPI_Barrier(local_group_comm_);
00216 }
00217 }
00218 frags.clear();
00219 }
00220 TLOG_DEBUG("builder") << "detector waiting " << my_rank << TLOG_ENDL;
00221 }
00222 TLOG_DEBUG("builder") << "detector done " << my_rank << TLOG_ENDL;
00223 MPI_Comm_free(&local_group_comm_);
00224 MPI_Barrier(MPI_COMM_WORLD);
00225 }
00226
00227 void Builder::sink()
00228 {
00229 printHost("sink");
00230 {
00231 usleep(1000 * my_rank);
00232
00233 auto events = std::make_shared<artdaq::SharedMemoryEventManager>(daq_pset_, daq_pset_);
00234 events->startRun(daq_pset_.get<int>("run_number", 100));
00235 {
00236 artdaq::DataReceiverManager h(daq_pset_, events);
00237 h.start_threads();
00238 while (h.running_sources().size() > 0)
00239 {
00240 usleep(10000);
00241 }
00242 }
00243
00244 TLOG_DEBUG("builder") << "All detectors are done, Sending endOfData Fragment" << TLOG_ENDL;
00245
00246
00247 bool endSucceeded = false;
00248 endSucceeded = events->endOfData();
00249 if (endSucceeded)
00250 {
00251 TLOG_DEBUG("builder") << "Sink: reader is done" << TLOG_ENDL;
00252 }
00253 else
00254 {
00255 TLOG_DEBUG("builder") << "Sink: reader failed to complete because the "
00256 << "endOfData marker could not be pushed onto the queue."
00257 << TLOG_ENDL;
00258 }
00259 }
00260 TLOG_DEBUG("builder") << "Sink done " << my_rank << TLOG_ENDL;
00261 MPI_Barrier(MPI_COMM_WORLD);
00262 }
00263
00264 void Builder::printHost(const std::string& functionName) const
00265 {
00266 char* doPrint = getenv("PRINT_HOST");
00267 if (doPrint == 0) { return; }
00268 const int ARRSIZE = 80;
00269 char hostname[ARRSIZE];
00270 std::string hostString;
00271 if (!gethostname(hostname, ARRSIZE))
00272 {
00273 hostString = hostname;
00274 }
00275 else
00276 {
00277 hostString = "unknown";
00278 }
00279 TLOG_DEBUG("builder") << "Running " << functionName
00280 << " on host " << hostString
00281 << " with rank " << my_rank << "."
00282 << TLOG_ENDL;
00283 }
00284
00285 void printUsage()
00286 {
00287 int myid = 0;
00288 struct rusage usage;
00289 getrusage(RUSAGE_SELF, &usage);
00290 std::cout << myid << ":"
00291 << " user=" << artdaq::TimeUtils::convertUnixTimeToSeconds(usage.ru_utime)
00292 << " sys=" << artdaq::TimeUtils::convertUnixTimeToSeconds(usage.ru_stime)
00293 << std::endl;
00294 }
00295
00296 int main(int argc, char* argv[])
00297 {
00298 artdaq::configureMessageFacility("builder");
00299
00300 std::ostringstream descstr;
00301 descstr << argv[0]
00302 << " <-c <config-file>> <other-options> [<source-file>]+";
00303 bpo::options_description desc(descstr.str());
00304 desc.add_options()
00305 ("config,c", bpo::value<std::string>(), "Configuration file.")
00306 ("key,k", bpo::value<int>(), "Shared Memory Key")
00307 ("help,h", "produce help message");
00308 bpo::variables_map vm;
00309 try {
00310 bpo::store(bpo::command_line_parser(argc, argv).options(desc).run(), vm);
00311 bpo::notify(vm);
00312 }
00313 catch (bpo::error const & e) {
00314 std::cerr << "Exception from command line processing in " << argv[0]
00315 << ": " << e.what() << "\n";
00316 return -1;
00317 }
00318 if (vm.count("help")) {
00319 std::cout << desc << std::endl;
00320 return 1;
00321 }
00322 if (!vm.count("config")) {
00323 std::cerr << "Exception from command line processing in " << argv[0]
00324 << ": no configuration file given.\n"
00325 << "For usage and an options list, please do '"
00326 << argv[0] << " --help"
00327 << "'.\n";
00328 return 2;
00329 }
00330 int key = 0;
00331 if (vm.count("key"))
00332 {
00333 key = vm["key"].as<int>();
00334 }
00335 fhicl::ParameterSet pset;
00336 if (getenv("FHICL_FILE_PATH") == nullptr) {
00337 std::cerr
00338 << "INFO: environment variable FHICL_FILE_PATH was not set. Using \".\"\n";
00339 setenv("FHICL_FILE_PATH", ".", 0);
00340 }
00341 cet::filepath_lookup_after1 lookup_policy("FHICL_FILE_PATH");
00342 fhicl::make_ParameterSet(vm["config"].as<std::string>(), lookup_policy, pset);
00343
00344 int rc = 1;
00345 try
00346 {
00347 Builder p(argc, argv, pset,key);
00348 std::cerr << "Started process " << my_rank << " of " << p.procs_ << ".\n";
00349 p.go();
00350 rc = 0;
00351 }
00352 catch (std::string& x)
00353 {
00354 std::cerr << "Exception (type string) caught in driver: "
00355 << x
00356 << '\n';
00357 return 1;
00358 }
00359 catch (char const* m)
00360 {
00361 std::cerr << "Exception (type char const*) caught in driver: ";
00362 if (m)
00363 {
00364 std::cerr << m;
00365 }
00366 else
00367 {
00368 std::cerr << "[the value was a null pointer, so no message is available]";
00369 }
00370 std::cerr << '\n';
00371 }
00372 return rc;
00373 }