artdaq  v3_06_00
NetMonTransportService_service.cc
1 #define TRACE_NAME "NetMonTransportService"
2 
3 #include "artdaq-core/Core/SharedMemoryEventReceiver.hh"
4 #include "artdaq/ArtModules/NetMonTransportService.h"
5 #include "artdaq/DAQdata/Globals.hh"
6 #include "artdaq/DAQrate/DataSenderManager.hh"
7 
8 #include "artdaq-core/Data/Fragment.hh"
9 #include "artdaq-core/Data/RawEvent.hh"
10 #include "artdaq-core/Utilities/TimeUtils.hh"
11 #include "artdaq/DAQdata/NetMonHeader.hh"
12 
13 #include "art/Framework/Services/Registry/ActivityRegistry.h"
14 #include "canvas/Utilities/Exception.h"
15 #include "cetlib/container_algorithms.h"
16 #include "cetlib_except/exception.h"
17 #include "fhiclcpp/ParameterSet.h"
18 #include "fhiclcpp/ParameterSetRegistry.h"
19 
20 #include <TBufferFile.h>
21 #include <TClass.h>
22 
23 #include <fstream>
24 #include <iomanip>
25 #include <iostream>
26 #include <string>
27 #include <vector>
28 
29 #define DUMP_SEND_MESSAGE 0
30 #define DUMP_RECEIVE_MESSAGE 0
31 
32 static fhicl::ParameterSet empty_pset;
33 
34 NetMonTransportService::NetMonTransportService(fhicl::ParameterSet const& pset, art::ActivityRegistry&)
35  : NetMonTransportServiceInterface(), data_pset_(pset), init_received_(false), sender_ptr_(nullptr), incoming_events_(nullptr), recvd_fragments_(nullptr)
36 {
37  TLOG(TLVL_TRACE) << "NetMonTransportService CONSTRUCTOR" ;
38 
39  init_timeout_s_ = pset.get<double>("init_fragment_timeout_seconds", 1.0);
40 }
41 
43 
45 {
46  auto start_time = std::chrono::steady_clock::now();
47 
48  char const* artapp_env = getenv("ARTDAQ_RANK");
49  if (artapp_env != NULL && my_rank < 0)
50  my_rank = std::atoi(artapp_env);
51 
52  while (my_rank == -1 && artdaq::TimeUtils::GetElapsedTime(start_time) < init_timeout_s_)
53  {
54  usleep(1000);
55 }
56  sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
57 }
58 
60 {
61  TLOG(TLVL_INFO) << "listen() start";
62  if (!incoming_events_)
63  {
64  incoming_events_.reset(new artdaq::SharedMemoryEventReceiver(
65  data_pset_.get<int>("shared_memory_key", 0xBEE70000 + getppid()),
66  data_pset_.get<int>("broadcast_shared_memory_key", 0xCEE70000 + getppid())));
67 
68  char const* artapp_env = getenv("ARTDAQ_APPLICATION_NAME");
69  std::string artapp_str = "";
70  if (artapp_env != NULL)
71  {
72  artapp_str = std::string(artapp_env) + "_";
73  }
74 
75  TLOG(TLVL_TRACE) << "Setting app_name";
76  app_name = artapp_str + "art" + std::to_string(incoming_events_->GetMyId());
77 
78  artapp_env = getenv("ARTDAQ_RANK");
79  if (artapp_env != NULL && my_rank < 0)
80  {
81  TLOG(TLVL_TRACE) << "Setting rank from envrionment";
82  my_rank = std::atoi(artapp_env);
83  }
84  else
85  {
86  TLOG(TLVL_TRACE) << "Setting my_rank from shared memory";
87  my_rank = incoming_events_->GetRank();
88  }
89 
90  TLOG(TLVL_INFO) << "app_name is " << app_name << ", rank " << my_rank;
91  }
92  TLOG(TLVL_INFO) << "listen() end";
93  return;
94 }
95 
97 {
98  if (sender_ptr_) sender_ptr_.reset(nullptr);
99 }
100 
101 void NetMonTransportService::sendMessage(uint64_t sequenceId, uint8_t messageType, TBufferFile& msg)
102 {
103  if (sender_ptr_ == nullptr)
104  {
105  TLOG(TLVL_DEBUG) << "Reconnecting DataSenderManager" ;
106  connect();
107  }
108 
109 #if DUMP_SEND_MESSAGE
110  std::string fileName = "sendMessage_" + std::to_string(my_rank) + "_" + std::to_string(getpid()) + "_" +
111  std::to_string(sequenceId) + ".bin";
112  std::fstream ostream(fileName, std::ios::out | std::ios::binary);
113  ostream.write(msg.Buffer(), msg.Length());
114  ostream.close();
115 #endif
116 
117  TLOG(TLVL_DEBUG) << "Sending message with sequenceID=" << sequenceId << ", type=" << (int)messageType
118  << ", length=" << msg.Length();
119  artdaq::NetMonHeader header;
120  header.data_length = static_cast<uint64_t>(msg.Length());
121  artdaq::Fragment fragment(std::ceil(msg.Length() / static_cast<double>(sizeof(artdaq::RawDataType))), sequenceId, 0,
122  messageType, header);
123 
124  memcpy(&*fragment.dataBegin(), msg.Buffer(), msg.Length());
125  sender_ptr_->sendFragment(std::move(fragment));
126  // Events are unique in art, so this will be the only send with this sequence ID!
127  sender_ptr_->RemoveRoutingTableEntry(sequenceId);
128 }
129 
131 {
132  listen();
133  TLOG(TLVL_TRACE) << "receiveMessage BEGIN" ;
134  while (recvd_fragments_ == nullptr)
135  {
136  TLOG(TLVL_TRACE) << "receiveMessage: Waiting for available buffer" ;
137  bool keep_looping = true;
138  bool got_event = false;
139  while (keep_looping)
140  {
141  keep_looping = false;
142  got_event = incoming_events_->ReadyForRead();
143  if (!got_event)
144  {
145  keep_looping = true;
146  }
147  }
148 
149  TLOG(TLVL_TRACE) << "receiveMessage: Reading buffer header" ;
150  auto errflag = false;
151  incoming_events_->ReadHeader(errflag);
152  if (errflag)
153  { // Buffer was changed out from under reader!
154  msg = nullptr;
155  return;
156  }
157  TLOG(TLVL_TRACE) << "receiveMessage: Getting Fragment types" ;
158  auto fragmentTypes = incoming_events_->GetFragmentTypes(errflag);
159  if (errflag)
160  { // Buffer was changed out from under reader!
161  incoming_events_->ReleaseBuffer();
162  msg = nullptr;
163  return;
164  }
165  if (fragmentTypes.size() == 0)
166  {
167  TLOG(TLVL_ERROR) << "Event has no Fragments! Aborting!" ;
168  incoming_events_->ReleaseBuffer();
169  msg = nullptr;
170  return;
171  }
172  TLOG(TLVL_TRACE) << "receiveMessage: Checking first Fragment type" ;
173  auto firstFragmentType = *fragmentTypes.begin();
174 
175  // We return false, indicating we're done reading, if:
176  // 1) we did not obtain an event, because we timed out and were
177  // configured NOT to keep trying after a timeout, or
178  // 2) the event we read was the end-of-data marker: a null
179  // pointer
180  if (!got_event || firstFragmentType == artdaq::Fragment::EndOfDataFragmentType)
181  {
182  TLOG(TLVL_DEBUG) << "Received shutdown message, returning from receiveMessage "
183  << "(debug: got_event=" << got_event << ",fragType=" << (int)firstFragmentType
184  << ",EODFragType=" << (int)artdaq::Fragment::EndOfDataFragmentType << ")";
185  incoming_events_->ReleaseBuffer();
186  msg = nullptr;
187  return;
188  }
189  if (firstFragmentType == artdaq::Fragment::InitFragmentType)
190  {
191  TLOG(TLVL_DEBUG) << "Cannot receive InitFragments here, retrying" ;
192  incoming_events_->ReleaseBuffer();
193  continue;
194  }
195  // EndOfRun and EndOfSubrun Fragments are ignored in NetMonTransportService
196  else if (firstFragmentType == artdaq::Fragment::EndOfRunFragmentType ||
197  firstFragmentType == artdaq::Fragment::EndOfSubrunFragmentType)
198  {
199  TLOG(TLVL_DEBUG) << "Ignoring EndOfRun or EndOfSubrun Fragment" ;
200  incoming_events_->ReleaseBuffer();
201  continue;
202  }
203 
204  TLOG(TLVL_TRACE) << "receiveMessage: Getting all Fragments" ;
205  recvd_fragments_ = incoming_events_->GetFragmentsByType(errflag, artdaq::Fragment::InvalidFragmentType);
206  if (!recvd_fragments_)
207  {
208  TLOG(TLVL_ERROR) << "Error retrieving Fragments from shared memory! Aborting!";
209  incoming_events_->ReleaseBuffer();
210  msg = nullptr;
211  return;
212  }
213  /* Events coming out of the EventStore are not sorted but need to be
214  sorted by sequence ID before they can be passed to art.
215  */
216  std::sort(recvd_fragments_->begin(), recvd_fragments_->end(), artdaq::fragmentSequenceIDCompare);
217 
218  TLOG(TLVL_TRACE) << "receiveMessage: Releasing buffer" ;
219  incoming_events_->ReleaseBuffer();
220  }
221 
222  // Do not process data until Init Fragment received!
223  auto start = std::chrono::steady_clock::now();
224  while (!init_received_ && artdaq::TimeUtils::GetElapsedTime(start) < init_timeout_s_)
225  {
226  usleep(init_timeout_s_ * 1000000 / 100); // Check 100 times
227  }
228  if (!init_received_)
229  {
230  TLOG(TLVL_ERROR) << "Received data but no Init Fragment after " << init_timeout_s_ << " seconds. Art will crash." ;
231  }
232 
233  TLOG(TLVL_TRACE) << "receiveMessage: Returning top Fragment" ;
234  artdaq::Fragment topFrag = std::move(recvd_fragments_->at(0));
235  recvd_fragments_->erase(recvd_fragments_->begin());
236  if (recvd_fragments_->size() == 0)
237  {
238  recvd_fragments_.reset(nullptr);
239  }
240 
241  TLOG(TLVL_TRACE) << "receiveMessage: Copying Fragment into TBufferFile, length="
242  << topFrag.metadata<artdaq::NetMonHeader>()->data_length;
243  auto header = topFrag.metadata<artdaq::NetMonHeader>();
244  auto buffer = static_cast<char *>(malloc(header->data_length));
245  memcpy(buffer, &*topFrag.dataBegin(), header->data_length);
246  msg = new TBufferFile(TBuffer::kRead, header->data_length, buffer, kTRUE, 0);
247 
248 #if DUMP_RECEIVE_MESSAGE
249  std::string fileName = "receiveMessage_" + std::to_string(my_rank) + "_" + std::to_string(getpid()) + "_" +
250  std::to_string(topFrag.sequenceID()) + ".bin";
251  std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
252  ostream.write(buffer, header->data_length);
253  ostream.close();
254 #endif
255 
256  TLOG(TLVL_TRACE) << "receiveMessage END" ;
257 }
258 
260 {
261  listen();
262  TLOG(TLVL_TRACE) << "receiveInitMessage BEGIN" ;
263  if (recvd_fragments_ == nullptr)
264  {
265  TLOG(TLVL_TRACE) << "receiveInitMessage: Waiting for available buffer" ;
266 
267  bool got_init = false;
268  auto errflag = false;
269  while (!got_init)
270  {
271  bool got_event = false;
272  while (!got_event)
273  {
274  got_event = incoming_events_->ReadyForRead(true);
275  }
276 
277  TLOG(TLVL_TRACE) << "receiveInitMessage: Reading buffer header" ;
278  incoming_events_->ReadHeader(errflag);
279  if (errflag)
280  { // Buffer was changed out from under reader!
281  TLOG(TLVL_ERROR) << "receiveInitMessage: Error receiving message!" ;
282  incoming_events_->ReleaseBuffer();
283  msg = nullptr;
284  return;
285  }
286  TLOG(TLVL_TRACE) << "receiveInitMessage: Getting Fragment types" ;
287  auto fragmentTypes = incoming_events_->GetFragmentTypes(errflag);
288  if (errflag)
289  { // Buffer was changed out from under reader!
290  incoming_events_->ReleaseBuffer();
291  msg = nullptr;
292  TLOG(TLVL_ERROR) << "receiveInitMessage: Error receiving message!" ;
293  return;
294  }
295  if (fragmentTypes.size() == 0)
296  {
297  TLOG(TLVL_ERROR) << "Event has no Fragments! Aborting!" ;
298  incoming_events_->ReleaseBuffer();
299  msg = nullptr;
300  return;
301  }
302  TLOG(TLVL_TRACE) << "receiveInitMessage: Checking first Fragment type" ;
303  auto firstFragmentType = *fragmentTypes.begin();
304 
305  // We return false, indicating we're done reading, if:
306  // 1) we did not obtain an event, because we timed out and were
307  // configured NOT to keep trying after a timeout, or
308  // 2) the event we read was the end-of-data marker: a null
309  // pointer
310  if (!got_event || firstFragmentType == artdaq::Fragment::EndOfDataFragmentType)
311  {
312  TLOG(TLVL_DEBUG) << "Received shutdown message, returning" ;
313  incoming_events_->ReleaseBuffer();
314  msg = nullptr;
315  return;
316  }
317  if (firstFragmentType != artdaq::Fragment::InitFragmentType)
318  {
319  TLOG(TLVL_WARNING) << "Did NOT receive Init Fragment as first broadcast! Type="
320  << artdaq::detail::RawFragmentHeader::SystemTypeToString(firstFragmentType);
321  incoming_events_->ReleaseBuffer();
322  }
323  got_init = true;
324  }
325  TLOG(TLVL_TRACE) << "receiveInitMessage: Getting all Fragments" ;
326  recvd_fragments_ = incoming_events_->GetFragmentsByType(errflag, artdaq::Fragment::InvalidFragmentType);
327  /* Events coming out of the EventStore are not sorted but need to be
328  sorted by sequence ID before they can be passed to art.
329  */
330  std::sort(recvd_fragments_->begin(), recvd_fragments_->end(), artdaq::fragmentSequenceIDCompare);
331 
332  incoming_events_->ReleaseBuffer();
333  }
334 
335  TLOG(TLVL_TRACE) << "receiveInitMessage: Returning top Fragment" ;
336  artdaq::Fragment topFrag = std::move(recvd_fragments_->at(0));
337  recvd_fragments_->erase(recvd_fragments_->begin());
338  if (recvd_fragments_->size() == 0)
339  {
340  recvd_fragments_.reset(nullptr);
341  }
342 
343  auto header = topFrag.metadata<artdaq::NetMonHeader>();
344  TLOG(TLVL_TRACE) << "receiveInitMessage: Copying Fragment into TBufferFile: message length: " << header->data_length ;
345  auto buffer = new char[header->data_length];
346  //auto buffer = static_cast<char *>(malloc(header->data_length)); // Fix alloc-dealloc-mismatch
347  memcpy(buffer, &*topFrag.dataBegin(), header->data_length);
348 
349 #if DUMP_RECEIVE_MESSAGE
350  std::string fileName = "receiveInitMessage_" + std::to_string(getpid()) + ".bin";
351  std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
352  ostream.write(buffer, header->data_length);
353  ostream.close();
354 #endif
355 
356  msg = new TBufferFile(TBuffer::kRead, header->data_length, buffer, kTRUE, 0);
357 
358  TLOG(TLVL_TRACE) << "receiveInitMessage END" ;
359  init_received_ = true;
360 }
361 DEFINE_ART_SERVICE_INTERFACE_IMPL(NetMonTransportService, NetMonTransportServiceInterface)
void receiveInitMessage(TBufferFile *&msg) override
Receive the init message.
Sends Fragment objects using TransferInterface plugins. Uses Routing Tables if confgiured, otherwise will Round-Robin Fragments to the destinations.
void sendMessage(uint64_t sequenceId, uint8_t messageType, TBufferFile &msg) override
Send ROOT data, wrapped in an artdaq::Fragment object.
NetMonTransportService extends NetMonTransportServiceInterface. It sends events using DataSenderManag...
void receiveMessage(TBufferFile *&msg) override
Receive data from the ConcurrentQueue.
Header with length information for NetMonTransport messages.
Definition: NetMonHeader.hh:14
void connect() override
Reconnect the NetMonTransportService.
virtual ~NetMonTransportService()
NetMonTransportService Destructor. Calls disconnect().
NetMonTransportService(fhicl::ParameterSet const &pset, art::ActivityRegistry &)
NetMonTransportService Constructor.
void disconnect() override
Disconnects the NetMonTranportService.
uint64_t data_length
The length of the message.
Definition: NetMonHeader.hh:16
Interface for NetMonTranportService. This interface is declared to art as part of the required regist...
void listen() override
Listen for connections. This method is a No-Op.