$treeview $search $mathjax $extrastylesheet
artdaq_mpich_plugin
v1_00_06a
$projectbrief
|
$projectbrief
|
$searchbox |
00001 #define TRACE_NAME "builder" 00002 00003 #include "art/Framework/Art/artapp.h" 00004 #include "artdaq-core/Generators/FragmentGenerator.hh" 00005 #include "artdaq-core/Data/Fragment.hh" 00006 #include "artdaq-core/Generators/makeFragmentGenerator.hh" 00007 #include "MPIProg.hh" 00008 #include "artdaq/DAQrate/DataSenderManager.hh" 00009 #include "artdaq/DAQrate/DataReceiverManager.hh" 00010 #include "artdaq-core/Core/SimpleMemoryReader.hh" 00011 #include "artdaq-mpich-plugin/Utilities/quiet_mpi.hh" 00012 00013 #include <boost/program_options.hpp> 00014 #include "fhiclcpp/make_ParameterSet.h" 00015 namespace bpo = boost::program_options; 00016 00017 #include <algorithm> 00018 #include <cmath> 00019 #include <cstdlib> 00020 00021 extern "C" 00022 { 00023 #include <unistd.h> 00024 } 00025 00026 #include <iostream> 00027 #include <memory> 00028 #include <utility> 00029 00030 extern "C" 00031 { 00032 #include <sys/time.h> 00033 #include <sys/resource.h> 00034 } 00035 00039 class Builder : public MPIProg 00040 { 00041 public: 00049 Builder(int argc, char* argv[], fhicl::ParameterSet pset, int key); 00050 00054 void go(); 00055 00059 void sink(); 00060 00064 void detector(); 00065 00066 private: 00067 enum class Role : int 00068 { 00069 DETECTOR, 00070 SINK 00071 }; 00072 00073 void printHost(const std::string& functionName) const; 00074 00075 fhicl::ParameterSet daq_pset_; 00076 bool const want_sink_; 00077 bool const want_periodic_sync_; 00078 MPI_Comm local_group_comm_; 00079 Role builder_role_; 00080 }; 00081 00082 Builder::Builder(int argc, char* argv[], fhicl::ParameterSet pset, int key) : 00083 MPIProg(argc, argv) 00084 , daq_pset_(pset) 00085 , want_sink_(daq_pset_.get<bool>("want_sink", true)) 00086 , want_periodic_sync_(daq_pset_.get<bool>("want_periodic_sync", false)) 00087 , local_group_comm_() 00088 { 00089 std::vector<std::string> detectors; 00090 daq_pset_.get_if_present("detectors", detectors); 00091 if (static_cast<size_t>(my_rank) >= detectors.size()) 00092 { 00093 builder_role_ = Role::SINK; 00094 } 00095 else 00096 { 00097 builder_role_ = Role::DETECTOR; 00098 } 00099 std::string type(pset.get<std::string>("transfer_plugin_type", "Shmem")); 00100 00101 int senders = pset.get<int>("num_senders"); 00102 int receivers = pset.get<int>("num_receivers"); 00103 int buffer_count = pset.get<int>("buffer_count", 10); 00104 int max_payload_size = pset.get<size_t>("fragment_size", 0x100000); 00105 00106 std::string hostmap = ""; 00107 if (pset.has_key("hostmap")) 00108 { 00109 hostmap = " host_map: @local::hostmap"; 00110 } 00111 00112 std::stringstream ss; 00113 ss << pset.to_string(); 00114 ss << " sources: {"; 00115 for (int ii = 0; ii < senders; ++ii) 00116 { 00117 ss << "s" << ii << ": { transferPluginType: " << type << " source_rank: " << ii << " max_fragment_size_words: " << max_payload_size << " buffer_count: " << buffer_count << " shm_key_offset: " << std::to_string(key) << hostmap << "}"; 00118 } 00119 ss << "} destinations: {"; 00120 for (int jj = senders; jj < senders + receivers; ++jj) 00121 { 00122 ss << "d" << jj << ": { transferPluginType: " << type << " destination_rank: " << jj << " max_fragment_size_words: " << max_payload_size << " buffer_count: " << buffer_count << " shm_key_offset: " << std::to_string(key) << hostmap << "}"; 00123 } 00124 ss << "}"; 00125 00126 daq_pset_ = fhicl::ParameterSet(); 00127 make_ParameterSet(ss.str(), daq_pset_); 00128 00129 00130 } 00131 00132 void Builder::go() 00133 { 00134 //volatile bool loopForever = true; 00135 //while(loopForever) 00136 //{ 00137 // usleep(1000000); 00138 //} 00139 00140 00141 MPI_Barrier(MPI_COMM_WORLD); 00142 //std::cout << "daq_pset_: " << daq_pset_.to_string() << std::endl << "conf_.makeParameterSet(): " << conf_.makeParameterSet().to_string() << std::endl; 00143 MPI_Comm_split(MPI_COMM_WORLD, static_cast<int>(builder_role_), 0, &local_group_comm_); 00144 switch (builder_role_) 00145 { 00146 case Role::SINK: 00147 if (want_sink_) 00148 { 00149 sink(); 00150 } 00151 else 00152 { 00153 std::string 00154 msg("WARNING: a sink was instantiated despite want_sink being false:\n" 00155 "set nsinks to 0 in invocation of daqrate?\n"); 00156 std::cerr << msg; 00157 MPI_Barrier(MPI_COMM_WORLD); 00158 } 00159 break; 00160 case Role::DETECTOR: 00161 detector(); 00162 break; 00163 default: 00164 throw "No such node type"; 00165 } 00166 } 00167 00168 void Builder::detector() 00169 { 00170 printHost("detector"); 00171 int detector_rank; 00172 // Should be zero-based, detectors only. 00173 MPI_Comm_rank(local_group_comm_, &detector_rank); 00174 assert(!(detector_rank < 0)); 00175 std::ostringstream det_ps_name_loc; 00176 std::vector<std::string> detectors; 00177 bool detectors_present = daq_pset_.get_if_present("detectors", detectors); 00178 size_t detectors_size = detectors.size(); 00179 if (!(detectors_present && detectors_size)) 00180 { 00181 throw cet::exception("Configuration") 00182 << "Unable to find required sequence of detector " 00183 << "parameter set names, \"detectors\"."; 00184 } 00185 fhicl::ParameterSet det_ps = 00186 daq_pset_.get<fhicl::ParameterSet>(((detectors_size > static_cast<size_t>(detector_rank)) ? detectors[detector_rank] : detectors[0])); 00187 std::unique_ptr<artdaq::FragmentGenerator> const 00188 gen(artdaq::makeFragmentGenerator 00189 (det_ps.get<std::string>("generator"), 00190 det_ps)); 00191 { // Block to handle lifetime of h, below. 00192 artdaq::DataSenderManager h(daq_pset_); 00193 MPI_Barrier(local_group_comm_); 00194 // not using the run time method 00195 // TimedLoop tl(conf_.run_time_); 00196 size_t fragments_per_source = -1; 00197 daq_pset_.get_if_present("fragments_per_source", fragments_per_source); 00198 artdaq::FragmentPtrs frags; 00199 size_t fragments_sent = 0; 00200 while (fragments_sent < fragments_per_source && gen->getNext(frags)) 00201 { 00202 if (!fragments_sent) 00203 { 00204 // Get the detectors lined up first time before we start the 00205 // firehoses. 00206 MPI_Barrier(local_group_comm_); 00207 } 00208 for (auto& fragPtr : frags) 00209 { 00210 std::cout << "Program::detector: Sending fragment " << fragments_sent + 1 << " of " << fragments_per_source << std::endl; 00211 TLOG(TLVL_DEBUG) << "Program::detector: Sending fragment " << fragments_sent + 1 << " of " << fragments_per_source ; 00212 h.sendFragment(std::move(*fragPtr)); 00213 if (++fragments_sent == fragments_per_source) { break; } 00214 if (want_periodic_sync_ && (fragments_sent % 100) == 0) 00215 { 00216 // Don't get too far out of sync. 00217 MPI_Barrier(local_group_comm_); 00218 } 00219 } 00220 frags.clear(); 00221 } 00222 TLOG(TLVL_DEBUG) << "detector waiting " << my_rank ; 00223 } 00224 TLOG(TLVL_DEBUG) << "detector done " << my_rank ; 00225 MPI_Comm_free(&local_group_comm_); 00226 MPI_Barrier(MPI_COMM_WORLD); 00227 } 00228 00229 void Builder::sink() 00230 { 00231 printHost("sink"); 00232 { 00233 usleep(1000 * my_rank); 00234 // This scope exists to control the lifetime of 'events' 00235 auto events = std::make_shared<artdaq::SharedMemoryEventManager>(daq_pset_, daq_pset_); 00236 events->startRun(daq_pset_.get<int>("run_number", 100)); 00237 { // Block to handle scope of h, below. 00238 artdaq::DataReceiverManager h(daq_pset_, events); 00239 h.start_threads(); 00240 while (h.running_sources().size() > 0) 00241 { 00242 usleep(10000); 00243 } 00244 } 00245 00246 TLOG(TLVL_DEBUG) << "All detectors are done, Sending endOfData Fragment" ; 00247 // Make the reader application finish, and capture its return 00248 // status. 00249 bool endSucceeded = false; 00250 endSucceeded = events->endOfData(); 00251 if (endSucceeded) 00252 { 00253 TLOG(TLVL_DEBUG) << "Sink: reader is done" ; 00254 } 00255 else 00256 { 00257 TLOG(TLVL_DEBUG) << "Sink: reader failed to complete because the " 00258 << "endOfData marker could not be pushed onto the queue." 00259 ; 00260 } 00261 } // end of lifetime of 'events' 00262 TLOG(TLVL_DEBUG) << "Sink done " << my_rank ; 00263 MPI_Barrier(MPI_COMM_WORLD); 00264 } 00265 00266 void Builder::printHost(const std::string& functionName) const 00267 { 00268 char* doPrint = getenv("PRINT_HOST"); 00269 if (doPrint == 0) { return; } 00270 const int ARRSIZE = 80; 00271 char hostname[ARRSIZE]; 00272 std::string hostString; 00273 if (!gethostname(hostname, ARRSIZE)) 00274 { 00275 hostString = hostname; 00276 } 00277 else 00278 { 00279 hostString = "unknown"; 00280 } 00281 TLOG(TLVL_DEBUG) << "Running " << functionName 00282 << " on host " << hostString 00283 << " with rank " << my_rank << "." 00284 ; 00285 } 00286 00287 void printUsage() 00288 { 00289 int myid = 0; 00290 struct rusage usage; 00291 getrusage(RUSAGE_SELF, &usage); 00292 std::cout << myid << ":" 00293 << " user=" << artdaq::TimeUtils::convertUnixTimeToSeconds(usage.ru_utime) 00294 << " sys=" << artdaq::TimeUtils::convertUnixTimeToSeconds(usage.ru_stime) 00295 << std::endl; 00296 } 00297 00298 int main(int argc, char* argv[]) 00299 { 00300 artdaq::configureMessageFacility("builder"); 00301 00302 std::ostringstream descstr; 00303 descstr << argv[0] 00304 << " <-c <config-file>> <other-options> [<source-file>]+"; 00305 bpo::options_description desc(descstr.str()); 00306 desc.add_options() 00307 ("config,c", bpo::value<std::string>(), "Configuration file.") 00308 ("key,k", bpo::value<int>(), "Shared Memory Key") 00309 ("help,h", "produce help message"); 00310 bpo::variables_map vm; 00311 try { 00312 bpo::store(bpo::command_line_parser(argc, argv).options(desc).run(), vm); 00313 bpo::notify(vm); 00314 } 00315 catch (bpo::error const & e) { 00316 std::cerr << "Exception from command line processing in " << argv[0] 00317 << ": " << e.what() << "\n"; 00318 return -1; 00319 } 00320 if (vm.count("help")) { 00321 std::cout << desc << std::endl; 00322 return 1; 00323 } 00324 if (!vm.count("config")) { 00325 std::cerr << "Exception from command line processing in " << argv[0] 00326 << ": no configuration file given.\n" 00327 << "For usage and an options list, please do '" 00328 << argv[0] << " --help" 00329 << "'.\n"; 00330 return 2; 00331 } 00332 int key = 0; 00333 if (vm.count("key")) 00334 { 00335 key = vm["key"].as<int>(); 00336 } 00337 fhicl::ParameterSet pset; 00338 if (getenv("FHICL_FILE_PATH") == nullptr) { 00339 std::cerr 00340 << "INFO: environment variable FHICL_FILE_PATH was not set. Using \".\"\n"; 00341 setenv("FHICL_FILE_PATH", ".", 0); 00342 } 00343 cet::filepath_lookup_after1 lookup_policy("FHICL_FILE_PATH"); 00344 fhicl::make_ParameterSet(vm["config"].as<std::string>(), lookup_policy, pset); 00345 00346 int rc = 1; 00347 try 00348 { 00349 Builder p(argc, argv, pset,key); 00350 std::cerr << "Started process " << my_rank << " of " << p.procs_ << ".\n"; 00351 p.go(); 00352 rc = 0; 00353 } 00354 catch (std::string& x) 00355 { 00356 std::cerr << "Exception (type string) caught in driver: " 00357 << x 00358 << '\n'; 00359 return 1; 00360 } 00361 catch (char const* m) 00362 { 00363 std::cerr << "Exception (type char const*) caught in driver: "; 00364 if (m) 00365 { 00366 std::cerr << m; 00367 } 00368 else 00369 { 00370 std::cerr << "[the value was a null pointer, so no message is available]"; 00371 } 00372 std::cerr << '\n'; 00373 } 00374 return rc; 00375 }