Loading [MathJax]/extensions/tex2jax.js
ATLAS Offline Software
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
RDataSource.cxx
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3 //
4 
5 // Local include(s).
6 #include "RDataSource.h"
7 
8 // xAOD include(s).
11 
12 // ROOT include(s).
13 #include <TChain.h>
14 #include <TFile.h>
15 #include <TROOT.h>
16 #include <TError.h>
17 
18 // System include(s).
19 #include <algorithm>
20 #include <memory>
21 #include <stdexcept>
22 
23 namespace {
24 
26  std::unique_ptr< TChain >
27  makeChain( const std::vector< std::string >& fileNames,
28  std::string_view treeName ) {
29 
30  // Create the chain object.
31  std::unique_ptr< TChain > chain;
32  {
33  R__LOCKGUARD( gROOTMutex );
34  chain = std::make_unique< TChain >( treeName.data() );
35  }
36 
37  // Set it up.
38  chain->ResetBit( kMustCleanup );
39  for( const std::string& fileName : fileNames ) {
40  chain->Add( fileName.c_str() );
41  }
42 
43  // Return the newly created chain.
44  return chain;
45  }
46 
47 } // private namespace
48 
49 namespace xAOD {
50 
52  template< typename FIRST, typename SECOND >
53  std::ostream& operator<< ( std::ostream& out,
54  const std::pair< FIRST, SECOND >& pair ) {
55 
56  out << "[" << pair.first << ", " << pair.second << "]";
57  return out;
58  }
59 
61  template< typename T >
62  std::ostream& operator<< ( std::ostream& out, const std::vector< T >& vec ) {
63 
64  out << "[";
65  for( size_t i = 0; i < vec.size(); ++i ) {
66  out << vec[ i ];
67  if( i + 1 < vec.size() ) {
68  out << ", ";
69  }
70  }
71  out << "]";
72  return out;
73  }
74 
76  template< typename T >
77  std::ostream& operator<< ( std::ostream& out,
78  const std::unordered_map< T, T >& umap ) {
79 
80  out << "{";
81  bool first = true;
82  for( auto& upair : umap ) {
83  if( ! first ) {
84  out << ", ";
85  }
86  out << upair.first << " : " << upair.second;
87  first = false;
88  }
89  out << "}";
90  return out;
91  }
92 
93 } // namespace xAOD
94 
96 #define PRINT_VERBOSE(MSG) \
97  do { \
98  if( m_verboseOutput ) { \
99  std::cout << "xAOD::RDataSource VERBOSE " << MSG << std::endl; \
100  } \
101  } while( false )
102 
103 namespace xAOD {
104 
105  RDataSource::RDataSource( std::string_view fileNameGlob,
106  std::string_view treeName )
107  : RDataSource( std::vector< std::string >( { fileNameGlob.data() } ),
108  treeName ) {
109 
110  }
111 
112  RDataSource::RDataSource( const std::vector< std::string >& fileNames,
113  std::string_view treeName )
114  : m_fileNames( fileNames ), m_treeName( treeName ),
115  m_verboseOutput( kFALSE ) {
116 
118  }
119 
121 
122  // I don't understand why, but ROOT really doesn't like it if the
123  // chains are not the first to be deleted from memory. :-/
124  m_chains.clear();
125  }
126 
127  void RDataSource::SetNSlots( unsigned int slots ) {
128 
129  // Some sanity checks.
130  if( slots == 0 ) {
131  ::Error( "xAOD::RDataSource::SetNSlots",
132  XAOD_MESSAGE( "Zero slots requested" ) );
133  throw std::invalid_argument( "Zero slots requested" );
134  }
135  if( m_events.size() != 0 ) {
136  ::Error( "xAOD::RDataSource::SetNSlots",
137  XAOD_MESSAGE( "Function called multiple times" ) );
138  throw std::runtime_error( "Function called multiple times" );
139  }
140 
141  // Reserve the correct number of elements.
142  m_chains.reserve( slots );
143  m_events.reserve( slots );
144  m_stores.reserve( slots );
145  PRINT_VERBOSE( "SetNSlots: Reserved " << slots
146  << " slots for the chains, events and stores" );
147 
148  // Create the event objects already at this point.
149  for( unsigned int i = 0; i < slots; ++i ) {
150 
151  // Set up the chain, event and store.
152  m_chains.push_back( ::makeChain( m_fileNames, m_treeName ) );
153  m_events.push_back( std::make_unique< RDataSourceEvent >() );
154  m_stores.push_back( std::make_unique< TStore >() );
155  TChain* chain = m_chains.back().get();
156  RDataSourceEvent* event = m_events.back().get();
157 
158  // Initialize the event object.
159  if( ! event->readFrom( chain ).isSuccess() ) {
160  ::Error( "xAOD::RDataSource::SetNSlots",
161  XAOD_MESSAGE( "Failed to set up xAOD::RDataSourceEvent "
162  "for slot %u" ), i );
163  throw std::runtime_error( "Failed to set up "
164  "xAOD::RDataSourceEvent" );
165  }
166 
167  // Load entry 0 for it. Notice that this is a waste of CPU and I/O
168  // on the surface. But it's not... This triggers the initialization of
169  // the files/trees used by these chains. Which happens much more
170  // quickly in a serial way in a single thread than in multiple threads
171  // at the same time. To be followed up with the ROOT developers...
172  if( event->getEntry( 0 ) < 0 ) {
173  ::Error( "xAOD::RDataSource::SetNSlots",
174  XAOD_MESSAGE( "Failed to load entry 0 for slot %u" ), i );
175  throw std::runtime_error( "Failed to load entry for slot" );
176  }
177  PRINT_VERBOSE( "SetNSlots: Initialized objects for slot " << i );
178  }
179 
180 #if ROOT_VERSION_CODE >= ROOT_VERSION(6,35,99)
181  // Pass on to the base class.
182  ROOT::RDF::RDataSource::SetNSlots( slots );
183 #endif
184 
185  // Return gracefully.
186  return;
187  }
188 
189 #if ROOT_VERSION_CODE < ROOT_VERSION(6,28,00)
190  void RDataSource::Initialise() {
191 #else
193 #endif
194 
195  // A sanity check.
196  if( m_entryRanges.size() != 0 ) {
197  ::Fatal( "xAOD::RDataSource::Initialize",
198  XAOD_MESSAGE( "Function called on an initialized object" ) );
199  }
200  PRINT_VERBOSE( "Initialize: Initializing the data source" );
201 
202  // Create a chain that will help determine the optimal entry ranges
203  // to process.
205  TObjArray* filesInChain = chain->GetListOfFiles();
206 
207  // Loop over the input files of the chain.
208  Long64_t fileOffset = 0;
209  for( Int_t ifile = 0; ifile < filesInChain->GetEntries(); ++ifile ) {
210 
211  // Open the file directly.
212  const char* fileName = filesInChain->At( ifile )->GetTitle();
213  auto file = std::unique_ptr< TFile >( TFile::Open( fileName,
214  "READ" ) );
215  if( ( ! file ) || file->IsZombie() ) {
216  ::Error( "xAOD::RDataSource::Initialize",
217  XAOD_MESSAGE( "Failed to open file: %s" ), fileName );
218  throw std::runtime_error( "Failed to open file: " +
219  std::string( fileName ) );
220  }
221 
222  // Access the event tree inside of it.
223  TTree* tree =
224  dynamic_cast< TTree* >( file->Get( m_treeName.c_str() ) );
225  if( ! tree ) {
226  // A file with no event tree is not necessarily a problem. It could
227  // just be a file that has no events left in it after all
228  // selections.
229  continue;
230  }
231 
232  // Extract the ideal entry ranges from the file.
233  const Long64_t entries = tree->GetEntries();
234  TTree::TClusterIterator clusterIter( tree->GetClusterIterator( 0 ) );
235  Long64_t clusterStart = 0;
236  while( ( clusterStart = clusterIter() ) < entries ) {
237  m_entryRanges.emplace_back( fileOffset + clusterStart,
238  fileOffset +
239  clusterIter.GetNextEntry() );
240  }
241 
242  // Increment the file offset value.
243  fileOffset += entries;
244  }
245  PRINT_VERBOSE( "Initialize: Created entry ranges: " << m_entryRanges );
246 
247  // Return gracefully.
248  return;
249  }
250 
251  void RDataSource::InitSlot( unsigned int slot, ULong64_t firstEntry ) {
252 
253  // A sanity check.
254  if( m_events.size() <= slot ) {
255  ::Error( "xAOD::RDataSource::InitSlot",
256  XAOD_MESSAGE( "Invalid slot (%u) received" ), slot );
257  throw std::runtime_error( "Invalid slot received" );
258  }
259 
260  // Load the first entry for it.
261  if( m_events[ slot ]->getEntry( firstEntry ) < 0 ) {
262  ::Error( "xAOD::RDataSource::InitSlot",
263  XAOD_MESSAGE( "Failed to load entry %lld for slot %u" ),
264  firstEntry, slot );
265  throw std::runtime_error( "Failed to load entry for slot" );
266  }
267  PRINT_VERBOSE( "InitSlot: Retrieved entry " << firstEntry << " for slot "
268  << slot );
269 
270  // Activate and clear the store.
271  m_stores[ slot ]->setActive();
272  m_stores[ slot ]->clear();
273  PRINT_VERBOSE( "InitSlot: Activated and cleared transient store for slot "
274  << slot );
275 
276  // Return gracefully.
277  return;
278  }
279 
280 #if ROOT_VERSION_CODE < ROOT_VERSION(6,28,00)
281  void RDataSource::FinaliseSlot( unsigned int slot ) {
282 
283  // Simply print what's happening.
284  PRINT_VERBOSE( "FinaliseSlot: Called for slot " << slot );
285 
286  // Return gracefully.
287  return;
288  }
289 
290  void RDataSource::Finalise() {
291 
292  // Simply print what's happening.
293  PRINT_VERBOSE( "Finalise: Function called" );
294 
295  // Return gracefully.
296  return;
297  }
298 #else
299  void RDataSource::FinalizeSlot( unsigned int slot ) {
300 
301  // Simply print what's happening.
302  PRINT_VERBOSE( "FinalizeSlot: Called for slot " << slot );
303 
304  // Return gracefully.
305  return;
306  }
307 
309 
310  // Simply print what's happening.
311  PRINT_VERBOSE( "Finalize: Function called" );
312 
313  // Return gracefully.
314  return;
315  }
316 #endif
317 
318  const std::vector< std::string >& RDataSource::GetColumnNames() const {
319 
320  return m_columnNames;
321  }
322 
323  bool RDataSource::HasColumn( std::string_view name ) const {
324 
325  return std::find( m_columnNames.begin(), m_columnNames.end(),
326  name ) != m_columnNames.end();
327  }
328 
329  std::string RDataSource::GetTypeName( std::string_view column ) const {
330 
331  // Make sure that the column/object is known.
332  if( ! HasColumn( column ) ) {
333  ::Error( "xAOD::RDataSource::GetTypeName",
334  XAOD_MESSAGE( "Column/object \"%s\" not available" ),
335  column.data() );
336  throw std::runtime_error( "Column/object \"" + std::string( column ) +
337  "\" not available" );
338  }
339 
340  // Get the type.
341  auto itr = m_classNameMap.find( column.data() );
342  if( itr == m_classNameMap.end() ) {
343  // Note that the fatal message will abort the entire job in all cases.
344  ::Fatal( "xAOD::RDataSource::GetTypeName",
345  XAOD_MESSAGE( "Internal logic error found" ) );
346  }
347  PRINT_VERBOSE( "GetTypeName: Type name for column \"" << column
348  << "\" is: " << itr->second );
349  return itr->second;
350  }
351 
353 
354  // When ROOT asks for the entry ranges, we have to tell it which ones
355  // have not been processed yet. Since we process all entries right away
356  // (SetEntry(...) does not have logic for not processing a requested
357  // entry), the logic here is to empty out the m_entryRanges variable on
358  // this call. So that on the next call an empty range would be returned.
359  const EntryRanges_t dummy( std::move( m_entryRanges ) );
360  return dummy;
361  }
362 
363  bool RDataSource::SetEntry( unsigned int slot, ULong64_t entry ) {
364 
365  // A sanity check.
366  if( m_events.size() <= slot ) {
367  ::Error( "xAOD::RDataSource::SetEntry",
368  XAOD_MESSAGE( "Invalid slot (%u) received" ), slot );
369  throw std::runtime_error( "Invalid slot received" );
370  }
371  PRINT_VERBOSE( "SetEntry: Called for slot " << slot << " and entry "
372  << entry );
373 
374  // Switch to the requested entry.
375  m_events[ slot ]->updateObjectsForEntry( entry );
376 
377  // Activate and clear the store.
378  m_stores[ slot ]->setActive();
379  m_stores[ slot ]->clear();
380 
381  // The entry is always processed.
382  return true;
383  }
384 
386 
388  return;
389  }
390 
392 
393  return m_verboseOutput;
394  }
395 
396  RDataSource::Record_t
398  const std::type_info& typeInfo ) {
399 
400  // Make sure that the column/object is known.
401  if( ! HasColumn( column ) ) {
402  ::Error( "xAOD::RDataSource::GetColumnReadersImpl",
403  XAOD_MESSAGE( "Column/object \"%s\" not available" ),
404  column.data() );
405  throw std::runtime_error( "Column/object \"" + std::string( column ) +
406  "\" not available" );
407  }
408  PRINT_VERBOSE( "GetColumnReadersImpl: Creating column readers for \""
409  << column << "/" << SG::normalizedTypeinfoName( typeInfo )
410  << "\"" );
411 
412  // Create the comlumn reader pointers.
413  Record_t result( m_events.size() );
414  for( size_t i = 0; i < m_events.size(); ++i ) {
415  result[ i ] = m_events[ i ]->columnReader( column, typeInfo );
416  }
417  return result;
418  }
419 
421 
422  // Create a temporary event object.
425  if( ! event.readFrom( chain.get() ).isSuccess() ) {
426  ::Error( "xAOD::RDataSource::readInputMetadata",
427  XAOD_MESSAGE( "Failed to connect to the input chain" ) );
428  throw std::runtime_error( "Failed to connect to the input chain" );
429  }
430 
431  // Load the first event of the input, if one is available.
432  if( event.getEntries() > 0 ) {
433  if( event.getEntry( 0 ) < 0 ) {
434  ::Error( "xAOD::RDataSource::readInputMetadata",
435  "Couldn't load the first event of the input" );
436  throw std::runtime_error( "Couldn't load the first event of the "
437  "input" );
438  }
439  }
440 
441  // Fill the column and type name variables.
442  m_columnNames.clear(); m_classNameMap.clear();
443  auto names = event.columnAndTypeNames();
444  m_columnNames.reserve( names.size() );
445  m_classNameMap.reserve( names.size() );
446  for( const auto& pair : names ) {
447  m_columnNames.push_back( pair.first );
448  m_classNameMap[ pair.first ] = pair.second;
449  }
450  PRINT_VERBOSE( "readInputMetadata: m_columnNames = " << m_columnNames );
451  PRINT_VERBOSE( "readInputMetadata: m_classNameMap = " << m_classNameMap );
452 
453  // ROOT memory management is weird... We must delete the chain first,
454  // before the TEvent object on top of it would be deleted...
455  chain.reset();
456 
457  // Return gracefully.
458  return;
459  }
460 
461 } // namespace xAOD
TestUtils.makeChain
def makeChain(flags, name, L1Thresholds, ChainSteps, Streams="physics:Main", Groups=["RATE:TestRateGroup", "BW:TestBW"])
Definition: TestUtils.py:34
xAOD::RDataSource::FinalizeSlot
virtual void FinalizeSlot(unsigned int slot) override final
Close the input file reading in one of the slots/threads.
Definition: RDataSource.cxx:299
xAOD::RDataSource::RDataSource
RDataSource(std::string_view fileNameGlob, std::string_view treeName="CollectionTree")
Constructor with the file name pattern.
Definition: RDataSource.cxx:105
xAOD::name
name
Definition: TriggerMenuJson_v1.cxx:29
xAOD::RDataSource::m_stores
std::vector< std::unique_ptr< TStore > > m_stores
In-memory whiteboards used during the event loop.
Definition: RDataSource.h:145
get_generator_info.result
result
Definition: get_generator_info.py:21
runLayerRecalibration.chain
chain
Definition: runLayerRecalibration.py:175
checkxAOD.fileNames
fileNames
Definition: Tools/PyUtils/bin/checkxAOD.py:79
find
std::string find(const std::string &s)
return a remapped string
Definition: hcg.cxx:135
xAOD::RDataSource::isVerboseOutput
Bool_t isVerboseOutput() const
Check whether verbose output is set up to be printed.
Definition: RDataSource.cxx:391
SG::normalizedTypeinfoName
std::string normalizedTypeinfoName(const std::type_info &info)
Convert a type_info to a normalized string representation (matching the names used in the root dictio...
Definition: normalizedTypeinfoName.cxx:120
xAOD::RDataSource::GetEntryRanges
virtual EntryRanges_t GetEntryRanges() override final
Get the entry ranges in the input file(s)
Definition: RDataSource.cxx:352
xAOD::RDataSource::m_fileNames
std::vector< std::string > m_fileNames
Files to read.
Definition: RDataSource.h:111
tree
TChain * tree
Definition: tile_monitor.h:30
python.AthDsoLogger.out
out
Definition: AthDsoLogger.py:71
DeMoUpdate.column
dictionary column
Definition: DeMoUpdate.py:1110
athena.value
value
Definition: athena.py:124
xAOD
ICaloAffectedTool is abstract interface for tools checking if 4 mom is in calo affected region.
Definition: ICaloAffectedTool.h:24
XAOD_MESSAGE
#define XAOD_MESSAGE(MESSAGE)
Simple macro for printing error/verbose messages.
Definition: Control/xAODRootAccess/xAODRootAccess/tools/Message.h:19
vec
std::vector< size_t > vec
Definition: CombinationsGeneratorTest.cxx:9
xAOD::RDataSource::m_chains
std::vector< std::unique_ptr< TChain > > m_chains
Chains used in the file I/O.
Definition: RDataSource.h:141
xAOD::RDataSource::m_classNameMap
std::unordered_map< std::string, std::string > m_classNameMap
The object name -> class name map.
Definition: RDataSource.h:125
xAOD::RDataSource::m_events
std::vector< std::unique_ptr< RDataSourceEvent > > m_events
Event objects performing the file I/O.
Definition: RDataSource.h:143
xAOD::RDataSource::SetEntry
virtual bool SetEntry(unsigned int slot, ULong64_t entry) override final
Set which entry a give slot/thread should be processing.
Definition: RDataSource.cxx:363
xAOD::RDataSource::m_entryRanges
EntryRanges_t m_entryRanges
Optimal entry ranges to split the processing into.
Definition: RDataSource.h:133
xAOD::RDataSource::m_verboseOutput
Bool_t m_verboseOutput
Whether verbose output should be printed or not.
Definition: RDataSource.h:115
xAOD::RDataSource
Data source for xAOD input files.
Definition: RDataSource.h:35
POOL::TEvent::readFrom
StatusCode readFrom(TFile *file)
Definition: PhysicsAnalysis/POOLRootAccess/src/TEvent.cxx:133
xAOD::RDataSource::m_columnNames
std::vector< std::string > m_columnNames
Names of the columns/objects on the input.
Definition: RDataSource.h:123
xAOD::RDataSource::GetColumnReadersImpl
virtual Record_t GetColumnReadersImpl(std::string_view column, const std::type_info &typeInfo) override final
Return the type-erased vector of pointers to pointers to column values.
Definition: RDataSource.cxx:397
event
POOL::TEvent event(POOL::TEvent::kClassAccess)
POOL::TEvent::getEntry
int getEntry(long entry)
Definition: PhysicsAnalysis/POOLRootAccess/src/TEvent.cxx:185
lumiFormat.i
int i
Definition: lumiFormat.py:85
POOL::TEvent::getEntries
long getEntries()
Definition: PhysicsAnalysis/POOLRootAccess/src/TEvent.cxx:124
vector
Definition: MultiHisto.h:13
xAOD::RDataSource::Finalize
virtual void Finalize() override final
Finalize the data source, after the event loop.
Definition: RDataSource.cxx:308
Message.h
TEvent.h
CalibDbCompareRT.dummy
dummy
Definition: CalibDbCompareRT.py:60
file
TFile * file
Definition: tile_monitor.h:29
dumpFileToPlots.treeName
string treeName
Definition: dumpFileToPlots.py:20
xAOD::RDataSource::Initialize
virtual void Initialize() override final
Initialize the data source, before the start of the event loop.
Definition: RDataSource.cxx:192
GetAllXsec.entry
list entry
Definition: GetAllXsec.py:132
xAOD::RDataSource::HasColumn
virtual bool HasColumn(std::string_view name) const override final
Check if the dataset has a certain column/object.
Definition: RDataSource.cxx:323
xAOD::RDataSource::EntryRanges_t
std::vector< std::pair< ULong64_t, ULong64_t > > EntryRanges_t
Type describing the entry ranges of the input file(s)
Definition: RDataSource.h:48
xAOD::RDataSource::readInputMetadata
void readInputMetadata()
Fill the metadata variables.
Definition: RDataSource.cxx:420
xAOD::RDataSource::setVerboseOutput
void setVerboseOutput(Bool_t value=kTRUE)
Set whether verbose output should be printed (for debugging)
Definition: RDataSource.cxx:385
PRINT_VERBOSE
#define PRINT_VERBOSE(MSG)
Helper macro for printing verbose messages for debugging.
Definition: RDataSource.cxx:96
xAOD::RDataSource::m_treeName
std::string m_treeName
Name of the event tree in the input files.
Definition: RDataSource.h:113
xAOD::RDataSource::InitSlot
virtual void InitSlot(unsigned int slot, ULong64_t firstEntry) override final
Initialize one of the slots/threads.
Definition: RDataSource.cxx:251
xAOD::RDataSource::~RDataSource
~RDataSource()
Destructor.
Definition: RDataSource.cxx:120
xAOD::RDataSourceEvent
Extension to xAOD::TEvent, used by xAOD::RDataSource.
Definition: RDataSourceEvent.h:31
DeMoScan.first
bool first
Definition: DeMoScan.py:536
entries
double entries
Definition: listroot.cxx:49
xAOD::RDataSource::GetColumnNames
virtual const std::vector< std::string > & GetColumnNames() const override final
Get the column/object names for the input file(s)
Definition: RDataSource.cxx:318
xAOD::RDataSource::GetTypeName
virtual std::string GetTypeName(std::string_view column) const override final
Get the type name of a given column/object.
Definition: RDataSource.cxx:329
LArCellNtuple.ifile
string ifile
Definition: LArCellNtuple.py:133
jobOptions.fileName
fileName
Definition: jobOptions.SuperChic_ALP2.py:39
RDataSource.h
xAOD::RDataSource::SetNSlots
virtual void SetNSlots(unsigned int slots) override final
Set the number of threads/slots that the data source should use.
Definition: RDataSource.cxx:127
xAOD::operator<<
std::ostream & operator<<(std::ostream &out, const std::pair< FIRST, SECOND > &pair)
Helper print operator.
Definition: RDataSource.cxx:53