ATLAS Offline Software
RDataSource.cxx
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3 //
4 
5 // Local include(s).
6 #include "RDataSource.h"
7 
8 // xAOD include(s).
11 
12 // ROOT include(s).
13 #include <TChain.h>
14 #include <TFile.h>
15 #include <TROOT.h>
16 #include <TError.h>
17 
18 // System include(s).
19 #include <algorithm>
20 #include <memory>
21 #include <stdexcept>
22 
23 namespace {
24 
26  std::unique_ptr< TChain >
27  makeChain( const std::vector< std::string >& fileNames,
28  std::string_view treeName ) {
29 
30  // Create the chain object.
31  std::unique_ptr< TChain > chain;
32  {
33  R__LOCKGUARD( gROOTMutex );
34  chain = std::make_unique< TChain >( treeName.data() );
35  }
36 
37  // Set it up.
38  chain->ResetBit( kMustCleanup );
39  for( const std::string& fileName : fileNames ) {
40  chain->Add( fileName.c_str() );
41  }
42 
43  // Return the newly created chain.
44  return chain;
45  }
46 
47 } // private namespace
48 
49 namespace xAOD {
50 
52  template< typename FIRST, typename SECOND >
53  std::ostream& operator<< ( std::ostream& out,
54  const std::pair< FIRST, SECOND >& pair ) {
55 
56  out << "[" << pair.first << ", " << pair.second << "]";
57  return out;
58  }
59 
61  template< typename T >
62  std::ostream& operator<< ( std::ostream& out, const std::vector< T >& vec ) {
63 
64  out << "[";
65  for( size_t i = 0; i < vec.size(); ++i ) {
66  out << vec[ i ];
67  if( i + 1 < vec.size() ) {
68  out << ", ";
69  }
70  }
71  out << "]";
72  return out;
73  }
74 
76  template< typename T >
77  std::ostream& operator<< ( std::ostream& out,
78  const std::unordered_map< T, T >& umap ) {
79 
80  out << "{";
81  bool first = true;
82  for( auto& upair : umap ) {
83  if( ! first ) {
84  out << ", ";
85  }
86  out << upair.first << " : " << upair.second;
87  first = false;
88  }
89  out << "}";
90  return out;
91  }
92 
93 } // namespace xAOD
94 
96 #define PRINT_VERBOSE(MSG) \
97  do { \
98  if( m_verboseOutput ) { \
99  std::cout << "xAOD::RDataSource VERBOSE " << MSG << std::endl; \
100  } \
101  } while( false )
102 
103 namespace xAOD {
104 
105  RDataSource::RDataSource( std::string_view fileNameGlob,
106  std::string_view treeName )
107  : RDataSource( std::vector< std::string >( { fileNameGlob.data() } ),
108  treeName ) {
109 
110  }
111 
112  RDataSource::RDataSource( const std::vector< std::string >& fileNames,
113  std::string_view treeName )
114  : m_fileNames( fileNames ), m_treeName( treeName ) {
115 
117  }
118 
120 
121  // I don't understand why, but ROOT really doesn't like it if the
122  // chains are not the first to be deleted from memory. :-/
123  m_chains.clear();
124  }
125 
126  void RDataSource::SetNSlots( unsigned int slots ) {
127 
128  // Some sanity checks.
129  if( slots == 0 ) {
130  ::Error( "xAOD::RDataSource::SetNSlots",
131  XAOD_MESSAGE( "Zero slots requested" ) );
132  throw std::invalid_argument( "Zero slots requested" );
133  }
134  if( m_events.size() != 0 ) {
135  ::Error( "xAOD::RDataSource::SetNSlots",
136  XAOD_MESSAGE( "Function called multiple times" ) );
137  throw std::runtime_error( "Function called multiple times" );
138  }
139 
140  // Reserve the correct number of elements.
141  m_chains.reserve( slots );
142  m_events.reserve( slots );
143  m_stores.reserve( slots );
144  PRINT_VERBOSE( "SetNSlots: Reserved " << slots
145  << " slots for the chains, events and stores" );
146 
147  // Create the event objects already at this point.
148  for( unsigned int i = 0; i < slots; ++i ) {
149 
150  // Set up the chain, event and store.
151  m_chains.push_back( ::makeChain( m_fileNames, m_treeName ) );
152  m_events.push_back(
153  std::make_unique< RDataSourceEvent >( m_auxmode ) );
154  m_stores.push_back( std::make_unique< TStore >() );
155  TChain* chain = m_chains.back().get();
156  RDataSourceEvent* event = m_events.back().get();
157 
158  // Initialize the event object.
159  if( ! event->readFrom( chain ).isSuccess() ) {
160  ::Error( "xAOD::RDataSource::SetNSlots",
161  XAOD_MESSAGE( "Failed to set up xAOD::RDataSourceEvent "
162  "for slot %u" ), i );
163  throw std::runtime_error( "Failed to set up "
164  "xAOD::RDataSourceEvent" );
165  }
166 
167  // Load entry 0 for it. Notice that this is a waste of CPU and I/O
168  // on the surface. But it's not... This triggers the initialization of
169  // the files/trees used by these chains. Which happens much more
170  // quickly in a serial way in a single thread than in multiple threads
171  // at the same time. To be followed up with the ROOT developers...
172  if( event->getEntry( 0 ) < 0 ) {
173  ::Error( "xAOD::RDataSource::SetNSlots",
174  XAOD_MESSAGE( "Failed to load entry 0 for slot %u" ), i );
175  throw std::runtime_error( "Failed to load entry for slot" );
176  }
177  PRINT_VERBOSE( "SetNSlots: Initialized objects for slot " << i );
178  }
179 
180 #if ROOT_VERSION_CODE >= ROOT_VERSION(6,35,99)
181  // Pass on to the base class.
182  ROOT::RDF::RDataSource::SetNSlots( slots );
183 #endif
184 
185  // Return gracefully.
186  return;
187  }
188 
190 
191  // A sanity check.
192  if( m_entryRanges.size() != 0 ) {
193  ::Fatal( "xAOD::RDataSource::Initialize",
194  XAOD_MESSAGE( "Function called on an initialized object" ) );
195  }
196  PRINT_VERBOSE( "Initialize: Initializing the data source" );
197 
198  // Create a chain that will help determine the optimal entry ranges
199  // to process.
201  TObjArray* filesInChain = chain->GetListOfFiles();
202 
203  // Loop over the input files of the chain.
204  Long64_t fileOffset = 0;
205  for( Int_t ifile = 0; ifile < filesInChain->GetEntries(); ++ifile ) {
206 
207  // Open the file directly.
208  const char* fileName = filesInChain->At( ifile )->GetTitle();
209  auto file = std::unique_ptr< TFile >( TFile::Open( fileName,
210  "READ" ) );
211  if( ( ! file ) || file->IsZombie() ) {
212  ::Error( "xAOD::RDataSource::Initialize",
213  XAOD_MESSAGE( "Failed to open file: %s" ), fileName );
214  throw std::runtime_error( "Failed to open file: " +
215  std::string( fileName ) );
216  }
217 
218  // Access the event tree inside of it.
219  TTree* tree =
220  dynamic_cast< TTree* >( file->Get( m_treeName.c_str() ) );
221  if( ! tree ) {
222  // A file with no event tree is not necessarily a problem. It could
223  // just be a file that has no events left in it after all
224  // selections.
225  continue;
226  }
227 
228  // Extract the ideal entry ranges from the file.
229  const Long64_t entries = tree->GetEntries();
230  TTree::TClusterIterator clusterIter( tree->GetClusterIterator( 0 ) );
231  Long64_t clusterStart = 0;
232  while( ( clusterStart = clusterIter() ) < entries ) {
233  m_entryRanges.emplace_back( fileOffset + clusterStart,
234  fileOffset +
235  clusterIter.GetNextEntry() );
236  }
237 
238  // Increment the file offset value.
239  fileOffset += entries;
240  }
241  PRINT_VERBOSE( "Initialize: Created entry ranges: " << m_entryRanges );
242 
243  // Return gracefully.
244  return;
245  }
246 
247  void RDataSource::InitSlot( unsigned int slot, ULong64_t firstEntry ) {
248 
249  // A sanity check.
250  if( m_events.size() <= slot ) {
251  ::Error( "xAOD::RDataSource::InitSlot",
252  XAOD_MESSAGE( "Invalid slot (%u) received" ), slot );
253  throw std::runtime_error( "Invalid slot received" );
254  }
255 
256  // Load the first entry for it.
257  if( m_events[ slot ]->getEntry( firstEntry ) < 0 ) {
258  ::Error( "xAOD::RDataSource::InitSlot",
259  XAOD_MESSAGE( "Failed to load entry %lld for slot %u" ),
260  firstEntry, slot );
261  throw std::runtime_error( "Failed to load entry for slot" );
262  }
263  PRINT_VERBOSE( "InitSlot: Retrieved entry " << firstEntry << " for slot "
264  << slot );
265 
266  // Activate and clear the store.
267  m_stores[ slot ]->setActive();
268  m_stores[ slot ]->clear();
269  PRINT_VERBOSE( "InitSlot: Activated and cleared transient store for slot "
270  << slot );
271 
272  // Return gracefully.
273  return;
274  }
275 
276  void RDataSource::FinalizeSlot( unsigned int slot ) {
277 
278  // Simply print what's happening.
279  PRINT_VERBOSE( "FinalizeSlot: Called for slot " << slot );
280 
281  // Return gracefully.
282  return;
283  }
284 
286 
287  // Simply print what's happening.
288  PRINT_VERBOSE( "Finalize: Function called" );
289 
290  // Return gracefully.
291  return;
292  }
293 
294  const std::vector< std::string >& RDataSource::GetColumnNames() const {
295 
296  return m_columnNames;
297  }
298 
299  bool RDataSource::HasColumn( std::string_view name ) const {
300 
301  return std::find( m_columnNames.begin(), m_columnNames.end(),
302  name ) != m_columnNames.end();
303  }
304 
305  std::string RDataSource::GetTypeName( std::string_view column ) const {
306 
307  // Make sure that the column/object is known.
308  if( ! HasColumn( column ) ) {
309  ::Error( "xAOD::RDataSource::GetTypeName",
310  XAOD_MESSAGE( "Column/object \"%s\" not available" ),
311  column.data() );
312  throw std::runtime_error( "Column/object \"" + std::string( column ) +
313  "\" not available" );
314  }
315 
316  // Get the type.
317  auto itr = m_classNameMap.find( column.data() );
318  if( itr == m_classNameMap.end() ) {
319  // Note that the fatal message will abort the entire job in all cases.
320  ::Fatal( "xAOD::RDataSource::GetTypeName",
321  XAOD_MESSAGE( "Internal logic error found" ) );
322  }
323  PRINT_VERBOSE( "GetTypeName: Type name for column \"" << column
324  << "\" is: " << itr->second );
325  return itr->second;
326  }
327 
329 
330  // When ROOT asks for the entry ranges, we have to tell it which ones
331  // have not been processed yet. Since we process all entries right away
332  // (SetEntry(...) does not have logic for not processing a requested
333  // entry), the logic here is to empty out the m_entryRanges variable on
334  // this call. So that on the next call an empty range would be returned.
335  const EntryRanges_t dummy( std::move( m_entryRanges ) );
336  return dummy;
337  }
338 
339  bool RDataSource::SetEntry( unsigned int slot, ULong64_t entry ) {
340 
341  // A sanity check.
342  if( m_events.size() <= slot ) {
343  ::Error( "xAOD::RDataSource::SetEntry",
344  XAOD_MESSAGE( "Invalid slot (%u) received" ), slot );
345  throw std::runtime_error( "Invalid slot received" );
346  }
347  PRINT_VERBOSE( "SetEntry: Called for slot " << slot << " and entry "
348  << entry );
349 
350  // Switch to the requested entry.
351  m_events[ slot ]->updateObjectsForEntry( entry );
352 
353  // Activate and clear the store.
354  m_stores[ slot ]->setActive();
355  m_stores[ slot ]->clear();
356 
357  // The entry is always processed.
358  return true;
359  }
360 
362 
364  return;
365  }
366 
368 
369  return m_verboseOutput;
370  }
371 
373 
374  m_auxmode = mode;
375  return;
376  }
377 
379 
380  return m_auxmode;
381  }
382 
383  RDataSource::Record_t
385  const std::type_info& typeInfo ) {
386 
387  // Make sure that the column/object is known.
388  if( ! HasColumn( column ) ) {
389  ::Error( "xAOD::RDataSource::GetColumnReadersImpl",
390  XAOD_MESSAGE( "Column/object \"%s\" not available" ),
391  column.data() );
392  throw std::runtime_error( "Column/object \"" + std::string( column ) +
393  "\" not available" );
394  }
395  PRINT_VERBOSE( "GetColumnReadersImpl: Creating column readers for \""
396  << column << "/" << SG::normalizedTypeinfoName( typeInfo )
397  << "\"" );
398 
399  // Create the comlumn reader pointers.
400  Record_t result( m_events.size() );
401  for( size_t i = 0; i < m_events.size(); ++i ) {
402  result[ i ] = m_events[ i ]->columnReader( column, typeInfo );
403  }
404  return result;
405  }
406 
408 
409  // Create a temporary event object.
412  if( ! event.readFrom( chain.get() ).isSuccess() ) {
413  ::Error( "xAOD::RDataSource::readInputMetadata",
414  XAOD_MESSAGE( "Failed to connect to the input chain" ) );
415  throw std::runtime_error( "Failed to connect to the input chain" );
416  }
417 
418  // Load the first event of the input, if one is available.
419  if( event.getEntries() > 0 ) {
420  if( event.getEntry( 0 ) < 0 ) {
421  ::Error( "xAOD::RDataSource::readInputMetadata",
422  "Couldn't load the first event of the input" );
423  throw std::runtime_error( "Couldn't load the first event of the "
424  "input" );
425  }
426  }
427 
428  // Fill the column and type name variables.
429  m_columnNames.clear(); m_classNameMap.clear();
430  auto names = event.columnAndTypeNames();
431  m_columnNames.reserve( names.size() );
432  m_classNameMap.reserve( names.size() );
433  for( const auto& pair : names ) {
434  m_columnNames.push_back( pair.first );
435  m_classNameMap[ pair.first ] = pair.second;
436  }
437  PRINT_VERBOSE( "readInputMetadata: m_columnNames = " << m_columnNames );
438  PRINT_VERBOSE( "readInputMetadata: m_classNameMap = " << m_classNameMap );
439 
440  // ROOT memory management is weird... We must delete the chain first,
441  // before the TEvent object on top of it would be deleted...
442  chain.reset();
443 
444  // Return gracefully.
445  return;
446  }
447 
448 } // namespace xAOD
TestUtils.makeChain
def makeChain(flags, name, L1Thresholds, ChainSteps, Streams="physics:Main", Groups=["RATE:TestRateGroup", "BW:TestBW"])
Definition: TestUtils.py:36
xAOD::RDataSource::FinalizeSlot
virtual void FinalizeSlot(unsigned int slot) override final
Close the input file reading in one of the slots/threads.
Definition: RDataSource.cxx:276
xAOD::RDataSource::RDataSource
RDataSource(std::string_view fileNameGlob, std::string_view treeName="CollectionTree")
Constructor with the file name pattern.
Definition: RDataSource.cxx:105
xAOD::name
name
Definition: TriggerMenuJson_v1.cxx:29
xAOD::RDataSource::m_stores
std::vector< std::unique_ptr< TStore > > m_stores
In-memory whiteboards used during the event loop.
Definition: RDataSource.h:141
get_generator_info.result
result
Definition: get_generator_info.py:21
runLayerRecalibration.chain
chain
Definition: runLayerRecalibration.py:175
checkxAOD.fileNames
fileNames
Definition: Tools/PyUtils/bin/checkxAOD.py:79
find
std::string find(const std::string &s)
return a remapped string
Definition: hcg.cxx:138
xAOD::RDataSource::isVerboseOutput
Bool_t isVerboseOutput() const
Check whether verbose output is set up to be printed.
Definition: RDataSource.cxx:367
SG::normalizedTypeinfoName
std::string normalizedTypeinfoName(const std::type_info &info)
Convert a type_info to a normalized string representation (matching the names used in the root dictio...
Definition: normalizedTypeinfoName.cxx:120
xAOD::RDataSource::GetEntryRanges
virtual EntryRanges_t GetEntryRanges() override final
Get the entry ranges in the input file(s)
Definition: RDataSource.cxx:328
xAOD::RDataSource::m_fileNames
std::vector< std::string > m_fileNames
Files to read.
Definition: RDataSource.h:105
tree
TChain * tree
Definition: tile_monitor.h:30
TRT::Track::event
@ event
Definition: InnerDetector/InDetCalibEvent/TRT_CalibData/TRT_CalibData/TrackInfo.h:74
python.AthDsoLogger.out
out
Definition: AthDsoLogger.py:70
DeMoUpdate.column
dictionary column
Definition: DeMoUpdate.py:1110
athena.value
value
Definition: athena.py:124
xAOD
ICaloAffectedTool is abstract interface for tools checking if 4 mom is in calo affected region.
Definition: ICaloAffectedTool.h:24
XAOD_MESSAGE
#define XAOD_MESSAGE(MESSAGE)
Simple macro for printing error/verbose messages.
Definition: Control/xAODRootAccess/xAODRootAccess/tools/Message.h:19
vec
std::vector< size_t > vec
Definition: CombinationsGeneratorTest.cxx:9
xAOD::RDataSource::m_chains
std::vector< std::unique_ptr< TChain > > m_chains
Chains used in the file I/O.
Definition: RDataSource.h:137
xAOD::RDataSource::m_classNameMap
std::unordered_map< std::string, std::string > m_classNameMap
The object name -> class name map.
Definition: RDataSource.h:121
xAOD::RDataSource::m_events
std::vector< std::unique_ptr< RDataSourceEvent > > m_events
Event objects performing the file I/O.
Definition: RDataSource.h:139
xAOD::RDataSource::SetEntry
virtual bool SetEntry(unsigned int slot, ULong64_t entry) override final
Set which entry a give slot/thread should be processing.
Definition: RDataSource.cxx:339
xAOD::RDataSource::m_entryRanges
EntryRanges_t m_entryRanges
Optimal entry ranges to split the processing into.
Definition: RDataSource.h:129
xAOD::RDataSource::m_verboseOutput
Bool_t m_verboseOutput
Whether verbose output should be printed or not.
Definition: RDataSource.h:109
xAOD::RDataSource
Data source for xAOD input files.
Definition: RDataSource.h:35
xAOD::RDataSource::m_columnNames
std::vector< std::string > m_columnNames
Names of the columns/objects on the input.
Definition: RDataSource.h:119
xAOD::RDataSource::GetColumnReadersImpl
virtual Record_t GetColumnReadersImpl(std::string_view column, const std::type_info &typeInfo) override final
Return the type-erased vector of pointers to pointers to column values.
Definition: RDataSource.cxx:384
lumiFormat.i
int i
Definition: lumiFormat.py:85
vector
Definition: MultiHisto.h:13
xAOD::RDataSource::Finalize
virtual void Finalize() override final
Finalize the data source, after the event loop.
Definition: RDataSource.cxx:285
Message.h
TEvent.h
CalibDbCompareRT.dummy
dummy
Definition: CalibDbCompareRT.py:59
file
TFile * file
Definition: tile_monitor.h:29
xAOD::RDataSource::auxMode
TEvent::EAuxMode auxMode() const
Get the auxiliary access mode.
Definition: RDataSource.cxx:378
dumpFileToPlots.treeName
string treeName
Definition: dumpFileToPlots.py:19
Preparation.mode
mode
Definition: Preparation.py:107
xAOD::RDataSource::Initialize
virtual void Initialize() override final
Initialize the data source, before the start of the event loop.
Definition: RDataSource.cxx:189
xAOD::RDataSource::setAuxMode
void setAuxMode(TEvent::EAuxMode mode)
Set the auxiliary access mode.
Definition: RDataSource.cxx:372
GetAllXsec.entry
list entry
Definition: GetAllXsec.py:132
xAOD::RDataSource::HasColumn
virtual bool HasColumn(std::string_view name) const override final
Check if the dataset has a certain column/object.
Definition: RDataSource.cxx:299
xAOD::RDataSource::EntryRanges_t
std::vector< std::pair< ULong64_t, ULong64_t > > EntryRanges_t
Type describing the entry ranges of the input file(s)
Definition: RDataSource.h:48
xAOD::RDataSource::readInputMetadata
void readInputMetadata()
Fill the metadata variables.
Definition: RDataSource.cxx:407
xAOD::RDataSource::setVerboseOutput
void setVerboseOutput(Bool_t value=kTRUE)
Set whether verbose output should be printed (for debugging)
Definition: RDataSource.cxx:361
PRINT_VERBOSE
#define PRINT_VERBOSE(MSG)
Helper macro for printing verbose messages for debugging.
Definition: RDataSource.cxx:96
xAOD::RDataSource::m_treeName
std::string m_treeName
Name of the event tree in the input files.
Definition: RDataSource.h:107
xAOD::TEvent::EAuxMode
EAuxMode
Auxiliary store "mode".
Definition: Control/xAODRootAccess/xAODRootAccess/TEvent.h:69
xAOD::RDataSource::m_auxmode
TEvent::EAuxMode m_auxmode
Auxiliary access mode.
Definition: RDataSource.h:111
xAOD::RDataSource::InitSlot
virtual void InitSlot(unsigned int slot, ULong64_t firstEntry) override final
Initialize one of the slots/threads.
Definition: RDataSource.cxx:247
xAOD::RDataSource::~RDataSource
~RDataSource()
Destructor.
Definition: RDataSource.cxx:119
xAOD::RDataSourceEvent
Extension to xAOD::TEvent, used by xAOD::RDataSource.
Definition: RDataSourceEvent.h:31
DeMoScan.first
bool first
Definition: DeMoScan.py:534
entries
double entries
Definition: listroot.cxx:49
xAOD::RDataSource::GetColumnNames
virtual const std::vector< std::string > & GetColumnNames() const override final
Get the column/object names for the input file(s)
Definition: RDataSource.cxx:294
ZDCMsg::Fatal
@ Fatal
Definition: ZDCMsg.h:23
xAOD::RDataSource::GetTypeName
virtual std::string GetTypeName(std::string_view column) const override final
Get the type name of a given column/object.
Definition: RDataSource.cxx:305
LArCellNtuple.ifile
string ifile
Definition: LArCellNtuple.py:133
jobOptions.fileName
fileName
Definition: jobOptions.SuperChic_ALP2.py:39
RDataSource.h
xAOD::RDataSource::SetNSlots
virtual void SetNSlots(unsigned int slots) override final
Set the number of threads/slots that the data source should use.
Definition: RDataSource.cxx:126
xAOD::operator<<
std::ostream & operator<<(std::ostream &out, const std::pair< FIRST, SECOND > &pair)
Helper print operator.
Definition: RDataSource.cxx:53