artdaq  v3_06_02
NetMonTransportService_service.cc
1 #define TRACE_NAME "NetMonTransportService"
2 
3 #include "artdaq-core/Core/SharedMemoryEventReceiver.hh"
4 #include "artdaq/ArtModules/NetMonTransportService.h"
5 #include "artdaq/DAQdata/Globals.hh"
6 #include "artdaq/DAQrate/DataSenderManager.hh"
7 
8 #include "artdaq-core/Data/Fragment.hh"
9 #include "artdaq-core/Data/RawEvent.hh"
10 #include "artdaq-core/Utilities/TimeUtils.hh"
11 #include "artdaq/DAQdata/NetMonHeader.hh"
12 
13 #include "art/Framework/Services/Registry/ActivityRegistry.h"
14 #include "canvas/Utilities/Exception.h"
15 #include "cetlib/container_algorithms.h"
16 #include "cetlib_except/exception.h"
17 #include "fhiclcpp/ParameterSet.h"
18 #include "fhiclcpp/ParameterSetRegistry.h"
19 
20 #include <TBufferFile.h>
21 #include <TClass.h>
22 
23 #include <fstream>
24 #include <iomanip>
25 #include <iostream>
26 #include <string>
27 #include <vector>
28 
29 #define DUMP_SEND_MESSAGE 0
30 #define DUMP_RECEIVE_MESSAGE 0
31 
32 #define build_key(seed) seed + ((GetPartitionNumber() + 1) << 16) + (getppid() & 0xFFFF)
33 
34 static fhicl::ParameterSet empty_pset;
35 
36 NetMonTransportService::NetMonTransportService(fhicl::ParameterSet const& pset, art::ActivityRegistry&)
37  : NetMonTransportServiceInterface(), data_pset_(pset), init_received_(false), sender_ptr_(nullptr), incoming_events_(nullptr), recvd_fragments_(nullptr)
38 {
39  TLOG(TLVL_TRACE) << "NetMonTransportService CONSTRUCTOR";
40 
41  init_timeout_s_ = pset.get<double>("init_fragment_timeout_seconds", 1.0);
42 }
43 
45 
47 {
48  auto start_time = std::chrono::steady_clock::now();
49 
50  char const* artapp_env = getenv("ARTDAQ_RANK");
51  if (artapp_env != NULL && my_rank < 0)
52  my_rank = std::atoi(artapp_env);
53 
54  while (my_rank == -1 && artdaq::TimeUtils::GetElapsedTime(start_time) < init_timeout_s_)
55  {
56  usleep(1000);
57  }
58  sender_ptr_.reset(new artdaq::DataSenderManager(data_pset_));
59 }
60 
62 {
63  TLOG(TLVL_TRACE) << "listen() start";
64  if (!incoming_events_)
65  {
66  incoming_events_.reset(new artdaq::SharedMemoryEventReceiver(
67  data_pset_.get<int>("shared_memory_key", build_key(0xEE000000)),
68  data_pset_.get<int>("broadcast_shared_memory_key", build_key(0xBB000000))));
69 
70  char const* artapp_env = getenv("ARTDAQ_APPLICATION_NAME");
71  std::string artapp_str = "";
72  if (artapp_env != NULL)
73  {
74  artapp_str = std::string(artapp_env) + "_";
75  }
76 
77  TLOG(TLVL_TRACE) << "Setting app_name";
78  app_name = artapp_str + "art" + std::to_string(incoming_events_->GetMyId());
79 
80  artapp_env = getenv("ARTDAQ_RANK");
81  if (artapp_env != NULL && my_rank < 0)
82  {
83  TLOG(TLVL_TRACE) << "Setting rank from envrionment";
84  my_rank = std::atoi(artapp_env);
85  }
86  else
87  {
88  TLOG(TLVL_TRACE) << "Setting my_rank from shared memory";
89  my_rank = incoming_events_->GetRank();
90  }
91 
92  TLOG(TLVL_INFO) << "app_name is " << app_name << ", rank " << my_rank;
93  }
94  TLOG(TLVL_TRACE) << "listen() end";
95  return;
96 }
97 
99 {
100  if (sender_ptr_) sender_ptr_.reset(nullptr);
101 }
102 
103 void NetMonTransportService::sendMessage(uint64_t sequenceId, uint8_t messageType, TBufferFile& msg)
104 {
105  if (sender_ptr_ == nullptr)
106  {
107  TLOG(TLVL_DEBUG) << "Reconnecting DataSenderManager";
108  connect();
109  }
110 
111 #if DUMP_SEND_MESSAGE
112  std::string fileName = "sendMessage_" + std::to_string(my_rank) + "_" + std::to_string(getpid()) + "_" +
113  std::to_string(sequenceId) + ".bin";
114  std::fstream ostream(fileName, std::ios::out | std::ios::binary);
115  ostream.write(msg.Buffer(), msg.Length());
116  ostream.close();
117 #endif
118 
119  TLOG(TLVL_DEBUG) << "Sending message with sequenceID=" << sequenceId << ", type=" << (int)messageType
120  << ", length=" << msg.Length();
121  artdaq::NetMonHeader header;
122  header.data_length = static_cast<uint64_t>(msg.Length());
123  artdaq::Fragment fragment(std::ceil(msg.Length() / static_cast<double>(sizeof(artdaq::RawDataType))), sequenceId, 0,
124  messageType, header);
125 
126  memcpy(&*fragment.dataBegin(), msg.Buffer(), msg.Length());
127  sender_ptr_->sendFragment(std::move(fragment));
128  // Events are unique in art, so this will be the only send with this sequence ID!
129  sender_ptr_->RemoveRoutingTableEntry(sequenceId);
130 }
131 
133 {
134  listen();
135  TLOG(TLVL_TRACE) << "receiveMessage BEGIN";
136  while (recvd_fragments_ == nullptr)
137  {
138  TLOG(TLVL_TRACE) << "receiveMessage: Waiting for available buffer";
139  bool keep_looping = true;
140  bool got_event = false;
141  while (keep_looping)
142  {
143  keep_looping = false;
144  got_event = incoming_events_->ReadyForRead();
145  if (!got_event)
146  {
147  keep_looping = true;
148  }
149  }
150 
151  TLOG(TLVL_TRACE) << "receiveMessage: Reading buffer header";
152  auto errflag = false;
153  incoming_events_->ReadHeader(errflag);
154  if (errflag)
155  { // Buffer was changed out from under reader!
156  msg = nullptr;
157  return;
158  }
159  TLOG(TLVL_TRACE) << "receiveMessage: Getting Fragment types";
160  auto fragmentTypes = incoming_events_->GetFragmentTypes(errflag);
161  if (errflag)
162  { // Buffer was changed out from under reader!
163  incoming_events_->ReleaseBuffer();
164  msg = nullptr;
165  return;
166  }
167  if (fragmentTypes.size() == 0)
168  {
169  TLOG(TLVL_ERROR) << "Event has no Fragments! Aborting!";
170  incoming_events_->ReleaseBuffer();
171  msg = nullptr;
172  return;
173  }
174  TLOG(TLVL_TRACE) << "receiveMessage: Checking first Fragment type";
175  auto firstFragmentType = *fragmentTypes.begin();
176 
177  // We return false, indicating we're done reading, if:
178  // 1) we did not obtain an event, because we timed out and were
179  // configured NOT to keep trying after a timeout, or
180  // 2) the event we read was the end-of-data marker: a null
181  // pointer
182  if (!got_event || firstFragmentType == artdaq::Fragment::EndOfDataFragmentType)
183  {
184  TLOG(TLVL_DEBUG) << "Received shutdown message, returning from receiveMessage "
185  << "(debug: got_event=" << got_event << ",fragType=" << (int)firstFragmentType
186  << ",EODFragType=" << (int)artdaq::Fragment::EndOfDataFragmentType << ")";
187  incoming_events_->ReleaseBuffer();
188  msg = nullptr;
189  return;
190  }
191  if (firstFragmentType == artdaq::Fragment::InitFragmentType)
192  {
193  TLOG(TLVL_DEBUG) << "Cannot receive InitFragments here, retrying";
194  incoming_events_->ReleaseBuffer();
195  continue;
196  }
197  // EndOfRun and EndOfSubrun Fragments are ignored in NetMonTransportService
198  else if (firstFragmentType == artdaq::Fragment::EndOfRunFragmentType ||
199  firstFragmentType == artdaq::Fragment::EndOfSubrunFragmentType)
200  {
201  TLOG(TLVL_DEBUG) << "Ignoring EndOfRun or EndOfSubrun Fragment";
202  incoming_events_->ReleaseBuffer();
203  continue;
204  }
205 
206  TLOG(TLVL_TRACE) << "receiveMessage: Getting all Fragments";
207  recvd_fragments_ = incoming_events_->GetFragmentsByType(errflag, artdaq::Fragment::InvalidFragmentType);
208  if (!recvd_fragments_)
209  {
210  TLOG(TLVL_ERROR) << "Error retrieving Fragments from shared memory! Aborting!";
211  incoming_events_->ReleaseBuffer();
212  msg = nullptr;
213  return;
214  }
215  /* Events coming out of the EventStore are not sorted but need to be
216  sorted by sequence ID before they can be passed to art.
217  */
218  std::sort(recvd_fragments_->begin(), recvd_fragments_->end(), artdaq::fragmentSequenceIDCompare);
219 
220  TLOG(TLVL_TRACE) << "receiveMessage: Releasing buffer";
221  incoming_events_->ReleaseBuffer();
222  }
223 
224  // Do not process data until Init Fragment received!
225  auto start = std::chrono::steady_clock::now();
226  while (!init_received_ && artdaq::TimeUtils::GetElapsedTime(start) < init_timeout_s_)
227  {
228  usleep(init_timeout_s_ * 1000000 / 100); // Check 100 times
229  }
230  if (!init_received_)
231  {
232  TLOG(TLVL_ERROR) << "Received data but no Init Fragment after " << init_timeout_s_ << " seconds. Art will crash.";
233  }
234 
235  TLOG(TLVL_TRACE) << "receiveMessage: Returning top Fragment";
236  artdaq::Fragment topFrag = std::move(recvd_fragments_->at(0));
237  recvd_fragments_->erase(recvd_fragments_->begin());
238  if (recvd_fragments_->size() == 0)
239  {
240  recvd_fragments_.reset(nullptr);
241  }
242 
243  TLOG(TLVL_TRACE) << "receiveMessage: Copying Fragment into TBufferFile, length="
244  << topFrag.metadata<artdaq::NetMonHeader>()->data_length;
245  auto header = topFrag.metadata<artdaq::NetMonHeader>();
246  auto buffer = static_cast<char*>(malloc(header->data_length));
247  memcpy(buffer, &*topFrag.dataBegin(), header->data_length);
248  msg = new TBufferFile(TBuffer::kRead, header->data_length, buffer, kTRUE, 0);
249 
250 #if DUMP_RECEIVE_MESSAGE
251  std::string fileName = "receiveMessage_" + std::to_string(my_rank) + "_" + std::to_string(getpid()) + "_" +
252  std::to_string(topFrag.sequenceID()) + ".bin";
253  std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
254  ostream.write(buffer, header->data_length);
255  ostream.close();
256 #endif
257 
258  TLOG(TLVL_TRACE) << "receiveMessage END";
259 }
260 
262 {
263  listen();
264  TLOG(TLVL_TRACE) << "receiveInitMessage BEGIN";
265  if (recvd_fragments_ == nullptr)
266  {
267  TLOG(TLVL_TRACE) << "receiveInitMessage: Waiting for available buffer";
268 
269  bool got_init = false;
270  auto errflag = false;
271  while (!got_init)
272  {
273  bool got_event = false;
274  while (!got_event)
275  {
276  got_event = incoming_events_->ReadyForRead(true);
277  }
278 
279  TLOG(TLVL_TRACE) << "receiveInitMessage: Reading buffer header";
280  incoming_events_->ReadHeader(errflag);
281  if (errflag)
282  { // Buffer was changed out from under reader!
283  TLOG(TLVL_ERROR) << "receiveInitMessage: Error receiving message!";
284  incoming_events_->ReleaseBuffer();
285  msg = nullptr;
286  return;
287  }
288  TLOG(TLVL_TRACE) << "receiveInitMessage: Getting Fragment types";
289  auto fragmentTypes = incoming_events_->GetFragmentTypes(errflag);
290  if (errflag)
291  { // Buffer was changed out from under reader!
292  incoming_events_->ReleaseBuffer();
293  msg = nullptr;
294  TLOG(TLVL_ERROR) << "receiveInitMessage: Error receiving message!";
295  return;
296  }
297  if (fragmentTypes.size() == 0)
298  {
299  TLOG(TLVL_ERROR) << "Event has no Fragments! Aborting!";
300  incoming_events_->ReleaseBuffer();
301  msg = nullptr;
302  return;
303  }
304  TLOG(TLVL_TRACE) << "receiveInitMessage: Checking first Fragment type";
305  auto firstFragmentType = *fragmentTypes.begin();
306 
307  // We return false, indicating we're done reading, if:
308  // 1) we did not obtain an event, because we timed out and were
309  // configured NOT to keep trying after a timeout, or
310  // 2) the event we read was the end-of-data marker: a null
311  // pointer
312  if (!got_event || firstFragmentType == artdaq::Fragment::EndOfDataFragmentType)
313  {
314  TLOG(TLVL_DEBUG) << "Received shutdown message, returning";
315  incoming_events_->ReleaseBuffer();
316  msg = nullptr;
317  return;
318  }
319  if (firstFragmentType != artdaq::Fragment::InitFragmentType)
320  {
321  TLOG(TLVL_WARNING) << "Did NOT receive Init Fragment as first broadcast! Type="
322  << artdaq::detail::RawFragmentHeader::SystemTypeToString(firstFragmentType);
323  incoming_events_->ReleaseBuffer();
324  }
325  got_init = true;
326  }
327  TLOG(TLVL_TRACE) << "receiveInitMessage: Getting all Fragments";
328  recvd_fragments_ = incoming_events_->GetFragmentsByType(errflag, artdaq::Fragment::InvalidFragmentType);
329  /* Events coming out of the EventStore are not sorted but need to be
330  sorted by sequence ID before they can be passed to art.
331  */
332  std::sort(recvd_fragments_->begin(), recvd_fragments_->end(), artdaq::fragmentSequenceIDCompare);
333 
334  incoming_events_->ReleaseBuffer();
335  }
336 
337  TLOG(TLVL_TRACE) << "receiveInitMessage: Returning top Fragment";
338  artdaq::Fragment topFrag = std::move(recvd_fragments_->at(0));
339  recvd_fragments_->erase(recvd_fragments_->begin());
340  if (recvd_fragments_->size() == 0)
341  {
342  recvd_fragments_.reset(nullptr);
343  }
344 
345  auto header = topFrag.metadata<artdaq::NetMonHeader>();
346  TLOG(TLVL_TRACE) << "receiveInitMessage: Copying Fragment into TBufferFile: message length: " << header->data_length;
347  auto buffer = new char[header->data_length];
348  // auto buffer = static_cast<char *>(malloc(header->data_length)); // Fix alloc-dealloc-mismatch
349  memcpy(buffer, &*topFrag.dataBegin(), header->data_length);
350 
351 #if DUMP_RECEIVE_MESSAGE
352  std::string fileName = "receiveInitMessage_" + std::to_string(getpid()) + ".bin";
353  std::fstream ostream(fileName.c_str(), std::ios::out | std::ios::binary);
354  ostream.write(buffer, header->data_length);
355  ostream.close();
356 #endif
357 
358  msg = new TBufferFile(TBuffer::kRead, header->data_length, buffer, kTRUE, 0);
359 
360  TLOG(TLVL_TRACE) << "receiveInitMessage END";
361  init_received_ = true;
362 }
363 DEFINE_ART_SERVICE_INTERFACE_IMPL(NetMonTransportService, NetMonTransportServiceInterface)
void receiveInitMessage(TBufferFile *&msg) override
Receive the init message.
Sends Fragment objects using TransferInterface plugins. Uses Routing Tables if confgiured, otherwise will Round-Robin Fragments to the destinations.
void sendMessage(uint64_t sequenceId, uint8_t messageType, TBufferFile &msg) override
Send ROOT data, wrapped in an artdaq::Fragment object.
NetMonTransportService extends NetMonTransportServiceInterface. It sends events using DataSenderManag...
void receiveMessage(TBufferFile *&msg) override
Receive data from the ConcurrentQueue.
Header with length information for NetMonTransport messages.
Definition: NetMonHeader.hh:13
void connect() override
Reconnect the NetMonTransportService.
virtual ~NetMonTransportService()
NetMonTransportService Destructor. Calls disconnect().
NetMonTransportService(fhicl::ParameterSet const &pset, art::ActivityRegistry &)
NetMonTransportService Constructor.
void disconnect() override
Disconnects the NetMonTranportService.
uint64_t data_length
The length of the message.
Definition: NetMonHeader.hh:15
Interface for NetMonTranportService. This interface is declared to art as part of the required regist...
void listen() override
Listen for connections. This method is a No-Op.