artdaq  v3_05_00
NetMonTransportService_service.cc
1 #define TRACE_NAME "NetMonTransportService"
2 
3 #include "artdaq-core/Core/SharedMemoryEventReceiver.hh"
4 #include "artdaq/ArtModules/NetMonTransportService.h"
5 #include "artdaq/DAQdata/Globals.hh"
6 #include "artdaq/DAQrate/DataSenderManager.hh"
7 
8 #include "artdaq-core/Data/Fragment.hh"
9 #include "artdaq-core/Data/RawEvent.hh"
10 #include "artdaq-core/Utilities/TimeUtils.hh"
11 #include "artdaq/DAQdata/NetMonHeader.hh"
12 
13 #include "art/Framework/Services/Registry/ActivityRegistry.h"
14 #include "canvas/Utilities/Exception.h"
15 #include "cetlib/container_algorithms.h"
16 #include "cetlib_except/exception.h"
17 #include "fhiclcpp/ParameterSet.h"
18 #include "fhiclcpp/ParameterSetRegistry.h"
19 
20 #include <TBufferFile.h>
21 #include <TClass.h>
22 
23 #include <fstream>
24 #include <iomanip>
25 #include <iostream>
26 #include <string>
27 #include <vector>
28 
29 #define DUMP_SEND_MESSAGE 0
30 #define DUMP_RECEIVE_MESSAGE 0
31 
32 static fhicl::ParameterSet empty_pset;
33 
34 NetMonTransportService::NetMonTransportService(fhicl::ParameterSet const& pset, art::ActivityRegistry&)
35  : NetMonTransportServiceInterface(), data_pset_(pset), init_received_(false), sender_ptr_(nullptr), incoming_events_(nullptr), recvd_fragments_(nullptr)
36 {
37  TLOG(TLVL_TRACE) << "NetMonTransportService CONSTRUCTOR";
38 
39  init_timeout_s_ = pset.get<double>("init_fragment_timeout_seconds", 1.0);
40 }
41 
43 
45 {
46  auto start_time = std::chrono::steady_clock::now();
47 
48  char const* artapp_env = getenv("ARTDAQ_RANK");
49  if (artapp_env != NULL && my_rank < 0)
50  my_rank = std::atoi(artapp_env);
51 
52  while (my_rank == -1 && artdaq::TimeUtils::GetElapsedTime(start_time) < init_timeout_s_)
53  {
54  usleep(1000);
55  }
56  sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
57 }
58 
60 {
61  TLOG(TLVL_INFO) << "listen() start";
62  if (!incoming_events_)
63  {
64  incoming_events_.reset(new artdaq::SharedMemoryEventReceiver(
65  data_pset_.get<int>("shared_memory_key", 0xBEE70000 + getppid()),
66  data_pset_.get<int>("broadcast_shared_memory_key", 0xCEE70000 + getppid())));
67 
68  char const* artapp_env = getenv("ARTDAQ_APPLICATION_NAME");
69  std::string artapp_str = "";
70  if (artapp_env != NULL)
71  {
72  artapp_str = std::string(artapp_env) + "_";
73  }
74 
75  TLOG(TLVL_TRACE) << "Setting app_name";
76  app_name = artapp_str + "art" + std::to_string(incoming_events_->GetMyId());
77 
78  artapp_env = getenv("ARTDAQ_RANK");
79  if (artapp_env != NULL && my_rank < 0)
80  {
81  TLOG(TLVL_TRACE) << "Setting rank from envrionment";
82  my_rank = std::atoi(artapp_env);
83  }
84  else
85  {
86  TLOG(TLVL_TRACE) << "Setting my_rank from shared memory";
87  my_rank = incoming_events_->GetRank();
88  }
89 
90  TLOG(TLVL_INFO) << "app_name is " << app_name << ", rank " << my_rank;
91  }
92  TLOG(TLVL_INFO) << "listen() end";
93  return;
94 }
95 
97 {
98  if (sender_ptr_) sender_ptr_.reset(nullptr);
99 }
100 
101 void NetMonTransportService::sendMessage(uint64_t sequenceId, uint8_t messageType, TBufferFile& msg)
102 {
103  if (sender_ptr_ == nullptr)
104  {
105  TLOG(TLVL_DEBUG) << "Reconnecting DataSenderManager";
106  connect();
107  }
108 
109 #if DUMP_SEND_MESSAGE
110  std::string fileName = "sendMessage_" + std::to_string(my_rank) + "_" + std::to_string(getpid()) + "_" +
111  std::to_string(sequenceId) + ".bin";
112  std::fstream ostream(fileName, std::ios::out | std::ios::binary);
113  ostream.write(msg.Buffer(), msg.Length());
114  ostream.close();
115 #endif
116 
117  TLOG(TLVL_DEBUG) << "Sending message with sequenceID=" << sequenceId << ", type=" << (int)messageType
118  << ", length=" << msg.Length();
119  artdaq::NetMonHeader header;
120  header.data_length = static_cast<uint64_t>(msg.Length());
121  artdaq::Fragment fragment(std::ceil(msg.Length() / static_cast<double>(sizeof(artdaq::RawDataType))), sequenceId, 0,
122  messageType, header);
123 
124  memcpy(&*fragment.dataBegin(), msg.Buffer(), msg.Length());
125  sender_ptr_->sendFragment(std::move(fragment));
126 }
127 
129 {
130  listen();
131  TLOG(TLVL_TRACE) << "receiveMessage BEGIN";
132  while (recvd_fragments_ == nullptr)
133  {
134  TLOG(TLVL_TRACE) << "receiveMessage: Waiting for available buffer";
135  bool keep_looping = true;
136  bool got_event = false;
137  while (keep_looping)
138  {
139  keep_looping = false;
140  got_event = incoming_events_->ReadyForRead();
141  if (!got_event)
142  {
143  keep_looping = true;
144  }
145  }
146 
147  TLOG(TLVL_TRACE) << "receiveMessage: Reading buffer header";
148  auto errflag = false;
149  incoming_events_->ReadHeader(errflag);
150  if (errflag)
151  { // Buffer was changed out from under reader!
152  msg = nullptr;
153  return;
154  }
155  TLOG(TLVL_TRACE) << "receiveMessage: Getting Fragment types";
156  auto fragmentTypes = incoming_events_->GetFragmentTypes(errflag);
157  if (errflag)
158  { // Buffer was changed out from under reader!
159  incoming_events_->ReleaseBuffer();
160  msg = nullptr;
161  return;
162  }
163  if (fragmentTypes.size() == 0)
164  {
165  TLOG(TLVL_ERROR) << "Event has no Fragments! Aborting!";
166  incoming_events_->ReleaseBuffer();
167  msg = nullptr;
168  return;
169  }
170  TLOG(TLVL_TRACE) << "receiveMessage: Checking first Fragment type";
171  auto firstFragmentType = *fragmentTypes.begin();
172 
173  // We return false, indicating we're done reading, if:
174  // 1) we did not obtain an event, because we timed out and were
175  // configured NOT to keep trying after a timeout, or
176  // 2) the event we read was the end-of-data marker: a null
177  // pointer
178  if (!got_event || firstFragmentType == artdaq::Fragment::EndOfDataFragmentType)
179  {
180  TLOG(TLVL_DEBUG) << "Received shutdown message, returning from receiveMessage "
181  << "(debug: got_event=" << got_event << ",fragType=" << (int)firstFragmentType
182  << ",EODFragType=" << (int)artdaq::Fragment::EndOfDataFragmentType << ")";
183  incoming_events_->ReleaseBuffer();
184  msg = nullptr;
185  return;
186  }
187  if (firstFragmentType == artdaq::Fragment::InitFragmentType)
188  {
189  TLOG(TLVL_DEBUG) << "Cannot receive InitFragments here, retrying";
190  incoming_events_->ReleaseBuffer();
191  continue;
192  }
193  // EndOfRun and EndOfSubrun Fragments are ignored in NetMonTransportService
194  else if (firstFragmentType == artdaq::Fragment::EndOfRunFragmentType ||
195  firstFragmentType == artdaq::Fragment::EndOfSubrunFragmentType)
196  {
197  TLOG(TLVL_DEBUG) << "Ignoring EndOfRun or EndOfSubrun Fragment";
198  incoming_events_->ReleaseBuffer();
199  continue;
200  }
201 
202  TLOG(TLVL_TRACE) << "receiveMessage: Getting all Fragments";
203  recvd_fragments_ = incoming_events_->GetFragmentsByType(errflag, artdaq::Fragment::InvalidFragmentType);
204  if (!recvd_fragments_)
205  {
206  TLOG(TLVL_ERROR) << "Error retrieving Fragments from shared memory! Aborting!";
207  incoming_events_->ReleaseBuffer();
208  msg = nullptr;
209  return;
210  }
211  /* Events coming out of the EventStore are not sorted but need to be
212  sorted by sequence ID before they can be passed to art.
213  */
214  std::sort(recvd_fragments_->begin(), recvd_fragments_->end(), artdaq::fragmentSequenceIDCompare);
215 
216  TLOG(TLVL_TRACE) << "receiveMessage: Releasing buffer";
217  incoming_events_->ReleaseBuffer();
218  }
219 
220  // Do not process data until Init Fragment received!
221  auto start = std::chrono::steady_clock::now();
222  while (!init_received_ && artdaq::TimeUtils::GetElapsedTime(start) < init_timeout_s_)
223  {
224  usleep(init_timeout_s_ * 1000000 / 100); // Check 100 times
225  }
226  if (!init_received_)
227  {
228  TLOG(TLVL_ERROR) << "Received data but no Init Fragment after " << init_timeout_s_ << " seconds. Art will crash.";
229  }
230 
231  TLOG(TLVL_TRACE) << "receiveMessage: Returning top Fragment";
232  artdaq::Fragment topFrag = std::move(recvd_fragments_->at(0));
233  recvd_fragments_->erase(recvd_fragments_->begin());
234  if (recvd_fragments_->size() == 0)
235  {
236  recvd_fragments_.reset(nullptr);
237  }
238 
239  TLOG(TLVL_TRACE) << "receiveMessage: Copying Fragment into TBufferFile, length="
240  << topFrag.metadata<artdaq::NetMonHeader>()->data_length;
241  auto header = topFrag.metadata<artdaq::NetMonHeader>();
242  auto buffer = static_cast<char*>(malloc(header->data_length));
243  memcpy(buffer, &*topFrag.dataBegin(), header->data_length);
244  msg = new TBufferFile(TBuffer::kRead, header->data_length, buffer, kTRUE, 0);
245 
246 #if DUMP_RECEIVE_MESSAGE
247  std::string fileName = "receiveMessage_" + std::to_string(my_rank) + "_" + std::to_string(getpid()) + "_" +
248  std::to_string(topFrag.sequenceID()) + ".bin";
249  std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
250  ostream.write(buffer, header->data_length);
251  ostream.close();
252 #endif
253 
254  TLOG(TLVL_TRACE) << "receiveMessage END";
255 }
256 
258 {
259  listen();
260  TLOG(TLVL_TRACE) << "receiveInitMessage BEGIN";
261  if (recvd_fragments_ == nullptr)
262  {
263  TLOG(TLVL_TRACE) << "receiveInitMessage: Waiting for available buffer";
264 
265  bool got_init = false;
266  auto errflag = false;
267  while (!got_init)
268  {
269  bool got_event = false;
270  while (!got_event)
271  {
272  got_event = incoming_events_->ReadyForRead(true);
273  }
274 
275  TLOG(TLVL_TRACE) << "receiveInitMessage: Reading buffer header";
276  incoming_events_->ReadHeader(errflag);
277  if (errflag)
278  { // Buffer was changed out from under reader!
279  TLOG(TLVL_ERROR) << "receiveInitMessage: Error receiving message!";
280  incoming_events_->ReleaseBuffer();
281  msg = nullptr;
282  return;
283  }
284  TLOG(TLVL_TRACE) << "receiveInitMessage: Getting Fragment types";
285  auto fragmentTypes = incoming_events_->GetFragmentTypes(errflag);
286  if (errflag)
287  { // Buffer was changed out from under reader!
288  incoming_events_->ReleaseBuffer();
289  msg = nullptr;
290  TLOG(TLVL_ERROR) << "receiveInitMessage: Error receiving message!";
291  return;
292  }
293  if (fragmentTypes.size() == 0)
294  {
295  TLOG(TLVL_ERROR) << "Event has no Fragments! Aborting!";
296  incoming_events_->ReleaseBuffer();
297  msg = nullptr;
298  return;
299  }
300  TLOG(TLVL_TRACE) << "receiveInitMessage: Checking first Fragment type";
301  auto firstFragmentType = *fragmentTypes.begin();
302 
303  // We return false, indicating we're done reading, if:
304  // 1) we did not obtain an event, because we timed out and were
305  // configured NOT to keep trying after a timeout, or
306  // 2) the event we read was the end-of-data marker: a null
307  // pointer
308  if (!got_event || firstFragmentType == artdaq::Fragment::EndOfDataFragmentType)
309  {
310  TLOG(TLVL_DEBUG) << "Received shutdown message, returning";
311  incoming_events_->ReleaseBuffer();
312  msg = nullptr;
313  return;
314  }
315  if (firstFragmentType != artdaq::Fragment::InitFragmentType)
316  {
317  TLOG(TLVL_WARNING) << "Did NOT receive Init Fragment as first broadcast! Type="
318  << artdaq::detail::RawFragmentHeader::SystemTypeToString(firstFragmentType);
319  incoming_events_->ReleaseBuffer();
320  }
321  got_init = true;
322  }
323  TLOG(TLVL_TRACE) << "receiveInitMessage: Getting all Fragments";
324  recvd_fragments_ = incoming_events_->GetFragmentsByType(errflag, artdaq::Fragment::InvalidFragmentType);
325  /* Events coming out of the EventStore are not sorted but need to be
326  sorted by sequence ID before they can be passed to art.
327  */
328  std::sort(recvd_fragments_->begin(), recvd_fragments_->end(), artdaq::fragmentSequenceIDCompare);
329 
330  incoming_events_->ReleaseBuffer();
331  }
332 
333  TLOG(TLVL_TRACE) << "receiveInitMessage: Returning top Fragment";
334  artdaq::Fragment topFrag = std::move(recvd_fragments_->at(0));
335  recvd_fragments_->erase(recvd_fragments_->begin());
336  if (recvd_fragments_->size() == 0)
337  {
338  recvd_fragments_.reset(nullptr);
339  }
340 
341  auto header = topFrag.metadata<artdaq::NetMonHeader>();
342  TLOG(TLVL_TRACE) << "receiveInitMessage: Copying Fragment into TBufferFile: message length: " << header->data_length;
343  auto buffer = new char[header->data_length];
344  // auto buffer = static_cast<char *>(malloc(header->data_length)); // Fix alloc-dealloc-mismatch
345  memcpy(buffer, &*topFrag.dataBegin(), header->data_length);
346 
347 #if DUMP_RECEIVE_MESSAGE
348  std::string fileName = "receiveInitMessage_" + std::to_string(getpid()) + ".bin";
349  std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
350  ostream.write(buffer, header->data_length);
351  ostream.close();
352 #endif
353 
354  msg = new TBufferFile(TBuffer::kRead, header->data_length, buffer, kTRUE, 0);
355 
356  TLOG(TLVL_TRACE) << "receiveInitMessage END";
357  init_received_ = true;
358 }
359 DEFINE_ART_SERVICE_INTERFACE_IMPL(NetMonTransportService, NetMonTransportServiceInterface)
void receiveInitMessage(TBufferFile *&msg) override
Receive the init message.
Sends Fragment objects using TransferInterface plugins. Uses Routing Tables if confgiured, otherwise will Round-Robin Fragments to the destinations.
void sendMessage(uint64_t sequenceId, uint8_t messageType, TBufferFile &msg) override
Send ROOT data, wrapped in an artdaq::Fragment object.
NetMonTransportService extends NetMonTransportServiceInterface. It sends events using DataSenderManag...
void receiveMessage(TBufferFile *&msg) override
Receive data from the ConcurrentQueue.
Header with length information for NetMonTransport messages.
Definition: NetMonHeader.hh:14
void connect() override
Reconnect the NetMonTransportService.
virtual ~NetMonTransportService()
NetMonTransportService Destructor. Calls disconnect().
NetMonTransportService(fhicl::ParameterSet const &pset, art::ActivityRegistry &)
NetMonTransportService Constructor.
void disconnect() override
Disconnects the NetMonTranportService.
uint64_t data_length
The length of the message.
Definition: NetMonHeader.hh:16
Interface for NetMonTranportService. This interface is declared to art as part of the required regist...
void listen() override
Listen for connections. This method is a No-Op.