00001 #define TRACE_NAME "TransferWrapper"
00002
00003 #include "artdaq/ArtModules/detail/TransferWrapper.hh"
00004 #include "artdaq/TransferPlugins/MakeTransferPlugin.hh"
00005 #include "artdaq/ExternalComms/MakeCommanderPlugin.hh"
00006 #include "artdaq/DAQdata/NetMonHeader.hh"
00007 #include "artdaq/DAQdata/Globals.hh"
00008 #include "artdaq-core/Utilities/ExceptionHandler.hh"
00009 #include "artdaq-core/Data/Fragment.hh"
00010
00011 #include "cetlib/BasicPluginFactory.h"
00012 #include "cetlib_except/exception.h"
00013 #include "fhiclcpp/ParameterSet.h"
00014
00015 #include <TBufferFile.h>
00016
00017 #include <limits>
00018 #include <iostream>
00019 #include <string>
00020 #include <sstream>
00021 #include <csignal>
00022
00023 namespace
00024 {
00025 volatile std::sig_atomic_t gSignalStatus = 0;
00026 }
00027
00032 void signal_handler(int signal)
00033 {
00034 gSignalStatus = signal;
00035 }
00036
00037 artdaq::TransferWrapper::TransferWrapper(const fhicl::ParameterSet& pset) :
00038 timeoutInUsecs_(pset.get<std::size_t>("timeoutInUsecs", 100000))
00039 , dispatcherHost_(pset.get<std::string>("dispatcherHost", "localhost"))
00040 , dispatcherPort_(pset.get<std::string>("dispatcherPort", "5266"))
00041 , serverUrl_(pset.get<std::string>("server_url", "http://" + dispatcherHost_ + ":" + dispatcherPort_ + "/RPC2"))
00042 , maxEventsBeforeInit_(pset.get<std::size_t>("maxEventsBeforeInit", 5))
00043 , allowedFragmentTypes_(pset.get<std::vector<int>>("allowedFragmentTypes", { 226, 227, 229 }))
00044 , quitOnFragmentIntegrityProblem_(pset.get<bool>("quitOnFragmentIntegrityProblem", true))
00045 , monitorRegistered_(false)
00046 {
00047 std::signal(SIGINT, signal_handler);
00048
00049 try
00050 {
00051 transfer_ = MakeTransferPlugin(pset, "transfer_plugin", TransferInterface::Role::kReceive);
00052 }
00053 catch (...)
00054 {
00055 ExceptionHandler(ExceptionHandlerRethrow::yes,
00056 "TransferWrapper: failure in call to MakeTransferPlugin");
00057 }
00058
00059 try {
00060 if (metricMan) {
00061 metricMan->initialize(pset.get<fhicl::ParameterSet>("metrics", fhicl::ParameterSet()), "Online Monitor");
00062 metricMan->do_start();
00063 }
00064 }
00065 catch (...)
00066 {
00067 ExceptionHandler(ExceptionHandlerRethrow::no, "TransferWrapper: could not configure metrics");
00068 }
00069
00070
00071 fhicl::ParameterSet new_pset(pset);
00072 if (!new_pset.has_key("server_url")) {
00073 new_pset.put<std::string>("server_url", serverUrl_);
00074 }
00075
00076 auto dispatcherConfig = pset.get<fhicl::ParameterSet>("dispatcher_config");
00077 artdaq::Commandable c;
00078 commander_ = MakeCommanderPlugin(new_pset, c);
00079
00080 int retry = 3;
00081
00082 while (retry > 0) {
00083 TLOG(TLVL_INFO) << "Attempting to register this monitor (\"" << transfer_->uniqueLabel()
00084 << "\") with the dispatcher aggregator" ;
00085
00086 auto status = commander_->send_register_monitor(dispatcherConfig.to_string());
00087
00088 TLOG(TLVL_INFO) << "Response from dispatcher is \"" << status << "\"" ;
00089
00090 if (status == "Success")
00091 {
00092 monitorRegistered_ = true;
00093 break;
00094 }
00095 else
00096 {
00097 TLOG(TLVL_WARNING) << "Error in TransferWrapper: attempt to register with dispatcher did not result in the \"Success\" response" ;
00098 usleep(100000);
00099 }
00100 retry--;
00101 }
00102 }
00103
00104 void artdaq::TransferWrapper::receiveMessage(std::unique_ptr<TBufferFile>& msg)
00105 {
00106 std::unique_ptr<artdaq::Fragment> fragmentPtr;
00107 bool receivedFragment = false;
00108 static bool initialized = false;
00109 static size_t fragments_received = 0;
00110
00111 while (true && !gSignalStatus)
00112 {
00113 fragmentPtr = std::make_unique<artdaq::Fragment>();
00114
00115 while (!receivedFragment)
00116 {
00117 if (gSignalStatus)
00118 {
00119 TLOG(TLVL_INFO) << "Ctrl-C appears to have been hit" ;
00120 unregisterMonitor();
00121 return;
00122 }
00123
00124 try
00125 {
00126 auto result = transfer_->receiveFragment(*fragmentPtr, timeoutInUsecs_);
00127
00128 if (result >= artdaq::TransferInterface::RECV_SUCCESS)
00129 {
00130 receivedFragment = true;
00131 fragments_received++;
00132
00133 static size_t cntr = 1;
00134 auto mod = ++cntr % 10;
00135 auto suffix = "-th";
00136 if (mod == 1) suffix = "-st";
00137 if (mod == 2) suffix = "-nd";
00138 if (mod == 3) suffix = "-rd";
00139 TLOG(TLVL_INFO) << "Received " << cntr << suffix << " event, "
00140 << "seqID == " << fragmentPtr->sequenceID()
00141 << ", type == " << fragmentPtr->typeString() ;
00142 continue;
00143 }
00144 else if (result == artdaq::TransferInterface::DATA_END)
00145 {
00146 TLOG(TLVL_ERROR) << "Transfer Plugin disconnected or other unrecoverable error. Shutting down.";
00147 unregisterMonitor();
00148 return;
00149 }
00150 else
00151 {
00152
00153
00154 TLOG(TLVL_WARNING) << "Timeout occurred in call to transfer_->receiveFragmentFrom; will try again"
00155 << ", status = " << result;
00156
00157 }
00158 }
00159 catch (...)
00160 {
00161 ExceptionHandler(ExceptionHandlerRethrow::yes,
00162 "Problem receiving data in TransferWrapper::receiveMessage");
00163 }
00164 }
00165
00166 if (fragmentPtr->type() == artdaq::Fragment::EndOfDataFragmentType)
00167 {
00168
00169
00170
00171
00172 return;
00173 }
00174
00175 try
00176 {
00177 extractTBufferFile(*fragmentPtr, msg);
00178 }
00179 catch (...)
00180 {
00181 ExceptionHandler(ExceptionHandlerRethrow::yes,
00182 "Problem extracting TBufferFile from artdaq::Fragment in TransferWrapper::receiveMessage");
00183 }
00184
00185 checkIntegrity(*fragmentPtr);
00186
00187 if (initialized || fragmentPtr->type() == artdaq::Fragment::InitFragmentType)
00188 {
00189 initialized = true;
00190 break;
00191 }
00192 else
00193 {
00194 receivedFragment = false;
00195
00196 if (fragments_received > maxEventsBeforeInit_)
00197 {
00198 throw cet::exception("TransferWrapper") << "First " << maxEventsBeforeInit_ <<
00199 " events received did not include the \"Init\" event containing necessary info for art; exiting...";
00200 }
00201 }
00202 }
00203 }
00204
00205
00206 void
00207 artdaq::TransferWrapper::extractTBufferFile(const artdaq::Fragment& fragment,
00208 std::unique_ptr<TBufferFile>& tbuffer)
00209 {
00210 const artdaq::NetMonHeader* header = fragment.metadata<artdaq::NetMonHeader>();
00211 char* buffer = (char *)malloc(header->data_length);
00212 memcpy(buffer, fragment.dataBeginBytes(), header->data_length);
00213
00214
00215 tbuffer.reset(new TBufferFile(TBuffer::kRead, header->data_length, buffer, kTRUE, 0));
00216 }
00217
00218 void
00219 artdaq::TransferWrapper::checkIntegrity(const artdaq::Fragment& fragment) const
00220 {
00221 const size_t artdaqheader = artdaq::detail::RawFragmentHeader::num_words() *
00222 sizeof(artdaq::detail::RawFragmentHeader::RawDataType);
00223 const size_t payload = static_cast<size_t>(fragment.dataEndBytes() - fragment.dataBeginBytes());
00224 const size_t metadata = sizeof(artdaq::NetMonHeader);
00225 const size_t totalsize = fragment.sizeBytes();
00226
00227 const size_t type = static_cast<size_t>(fragment.type());
00228
00229 if (totalsize != artdaqheader + metadata + payload)
00230 {
00231 std::stringstream errmsg;
00232 errmsg << "Error: artdaq fragment of type " <<
00233 fragment.typeString() << ", sequence ID " <<
00234 fragment.sequenceID() <<
00235 " has internally inconsistent measures of its size, signalling data corruption: in bytes," <<
00236 " total size = " << totalsize << ", artdaq fragment header = " << artdaqheader <<
00237 ", metadata = " << metadata << ", payload = " << payload;
00238
00239 TLOG(TLVL_ERROR) << errmsg.str() ;
00240
00241 if (quitOnFragmentIntegrityProblem_)
00242 {
00243 throw cet::exception("TransferWrapper") << errmsg.str();
00244 }
00245 else
00246 {
00247 return;
00248 }
00249 }
00250
00251 auto findloc = std::find(allowedFragmentTypes_.begin(), allowedFragmentTypes_.end(), static_cast<int>(type));
00252
00253 if (findloc == allowedFragmentTypes_.end())
00254 {
00255 std::stringstream errmsg;
00256 errmsg << "Error: artdaq fragment appears to have type "
00257 << type << ", not found in the allowed fragment types list";
00258
00259 TLOG(TLVL_ERROR) << errmsg.str() ;
00260 if (quitOnFragmentIntegrityProblem_)
00261 {
00262 throw cet::exception("TransferWrapper") << errmsg.str();
00263 }
00264 else
00265 {
00266 return;
00267 }
00268 }
00269 }
00270
00271 void
00272 artdaq::TransferWrapper::unregisterMonitor()
00273 {
00274 if (!monitorRegistered_)
00275 {
00276 throw cet::exception("TransferWrapper") <<
00277 "The function to unregister the monitor was called, but the monitor doesn't appear to be registered";
00278 }
00279
00280 int retry = 3;
00281 while (retry > 0) {
00282
00283 TLOG(TLVL_INFO) << "Requesting that this monitor (" << transfer_->uniqueLabel()
00284 << ") be unregistered from the dispatcher aggregator";
00285
00286 auto status = commander_->send_unregister_monitor(transfer_->uniqueLabel());
00287
00288
00289 TLOG(TLVL_INFO) << "Response from dispatcher is \""
00290 << status << "\"";
00291
00292 if (status == "Success")
00293 {
00294 monitorRegistered_ = false;
00295 break;
00296 }
00297 else if (status == "busy")
00298 { }
00299 else
00300 {
00301 throw cet::exception("TransferWrapper") << "Error in TransferWrapper: attempt to unregister with dispatcher did not result in the \"Success\" response";
00302 }
00303 retry--;
00304 }
00305 }
00306
00307
00308 artdaq::TransferWrapper::~TransferWrapper()
00309 {
00310 if (monitorRegistered_)
00311 {
00312 try
00313 {
00314 unregisterMonitor();
00315 }
00316 catch (...)
00317 {
00318 ExceptionHandler(ExceptionHandlerRethrow::no,
00319 "An exception occurred when trying to unregister monitor during TransferWrapper's destruction");
00320 }
00321 }
00322 artdaq::Globals::CleanUpGlobals();
00323 }