ATLAS Offline Software
RDataSource.cxx
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2023 CERN for the benefit of the ATLAS collaboration
3 //
4 
5 // Local include(s).
6 #include "RDataSource.h"
7 
8 // xAOD include(s).
11 
12 // ROOT include(s).
13 #include <TChain.h>
14 #include <TFile.h>
15 #include <TROOT.h>
16 #include <TError.h>
17 
18 // System include(s).
19 #include <algorithm>
20 #include <memory>
21 #include <stdexcept>
22 
23 namespace {
24 
26  std::unique_ptr< TChain >
27  makeChain( const std::vector< std::string >& fileNames,
28  std::string_view treeName ) {
29 
30  // Create the chain object.
31  std::unique_ptr< TChain > chain;
32  {
33  R__LOCKGUARD( gROOTMutex );
34  chain = std::make_unique< TChain >( treeName.data() );
35  }
36 
37  // Set it up.
38  chain->ResetBit( kMustCleanup );
39  for( const std::string& fileName : fileNames ) {
40  chain->Add( fileName.c_str() );
41  }
42 
43  // Return the newly created chain.
44  return chain;
45  }
46 
47 } // private namespace
48 
49 namespace xAOD {
50 
52  template< typename FIRST, typename SECOND >
53  std::ostream& operator<< ( std::ostream& out,
54  const std::pair< FIRST, SECOND >& pair ) {
55 
56  out << "[" << pair.first << ", " << pair.second << "]";
57  return out;
58  }
59 
61  template< typename T >
62  std::ostream& operator<< ( std::ostream& out, const std::vector< T >& vec ) {
63 
64  out << "[";
65  for( size_t i = 0; i < vec.size(); ++i ) {
66  out << vec[ i ];
67  if( i + 1 < vec.size() ) {
68  out << ", ";
69  }
70  }
71  out << "]";
72  return out;
73  }
74 
76  template< typename T >
77  std::ostream& operator<< ( std::ostream& out,
78  const std::unordered_map< T, T >& umap ) {
79 
80  out << "{";
81  bool first = true;
82  for( auto& upair : umap ) {
83  if( ! first ) {
84  out << ", ";
85  }
86  out << upair.first << " : " << upair.second;
87  first = false;
88  }
89  out << "}";
90  return out;
91  }
92 
93 } // namespace xAOD
94 
96 #define PRINT_VERBOSE(MSG) \
97  do { \
98  if( m_verboseOutput ) { \
99  std::cout << "xAOD::RDataSource VERBOSE " << MSG << std::endl; \
100  } \
101  } while( false )
102 
103 namespace xAOD {
104 
105  RDataSource::RDataSource( std::string_view fileNameGlob,
106  std::string_view treeName )
107  : RDataSource( std::vector< std::string >( { fileNameGlob.data() } ),
108  treeName ) {
109 
110  }
111 
112  RDataSource::RDataSource( const std::vector< std::string >& fileNames,
113  std::string_view treeName )
114  : m_fileNames( fileNames ), m_treeName( treeName ),
115  m_verboseOutput( kFALSE ) {
116 
118  }
119 
121 
122  // I don't understand why, but ROOT really doesn't like it if the
123  // chains are not the first to be deleted from memory. :-/
124  m_chains.clear();
125  }
126 
127  void RDataSource::SetNSlots( unsigned int slots ) {
128 
129  // Some sanity checks.
130  if( slots == 0 ) {
131  ::Error( "xAOD::RDataSource::SetNSlots",
132  XAOD_MESSAGE( "Zero slots requested" ) );
133  throw std::invalid_argument( "Zero slots requested" );
134  }
135  if( m_events.size() != 0 ) {
136  ::Error( "xAOD::RDataSource::SetNSlots",
137  XAOD_MESSAGE( "Function called multiple times" ) );
138  throw std::runtime_error( "Function called multiple times" );
139  }
140 
141  // Reserve the correct number of elements.
142  m_chains.reserve( slots );
143  m_events.reserve( slots );
144  m_stores.reserve( slots );
145  PRINT_VERBOSE( "SetNSlots: Reserved " << slots
146  << " slots for the chains, events and stores" );
147 
148  // Create the event objects already at this point.
149  for( unsigned int i = 0; i < slots; ++i ) {
150 
151  // Set up the chain, event and store.
152  m_chains.push_back( ::makeChain( m_fileNames, m_treeName ) );
153  m_events.push_back( std::make_unique< RDataSourceEvent >() );
154  m_stores.push_back( std::make_unique< TStore >() );
155  TChain* chain = m_chains.back().get();
156  RDataSourceEvent* event = m_events.back().get();
157 
158  // Initialize the event object.
159  if( ! event->readFrom( chain ).isSuccess() ) {
160  ::Error( "xAOD::RDataSource::SetNSlots",
161  XAOD_MESSAGE( "Failed to set up xAOD::RDataSourceEvent "
162  "for slot %u" ), i );
163  throw std::runtime_error( "Failed to set up "
164  "xAOD::RDataSourceEvent" );
165  }
166 
167  // Load entry 0 for it. Notice that this is a waste of CPU and I/O
168  // on the surface. But it's not... This triggers the initialization of
169  // the files/trees used by these chains. Which happens much more
170  // quickly in a serial way in a single thread than in multiple threads
171  // at the same time. To be followed up with the ROOT developers...
172  if( event->getEntry( 0 ) < 0 ) {
173  ::Error( "xAOD::RDataSource::SetNSlots",
174  XAOD_MESSAGE( "Failed to load entry 0 for slot %u" ), i );
175  throw std::runtime_error( "Failed to load entry for slot" );
176  }
177  PRINT_VERBOSE( "SetNSlots: Initialized objects for slot " << i );
178  }
179 
180  // Return gracefully.
181  return;
182  }
183 
184 #if ROOT_VERSION_CODE < ROOT_VERSION(6,28,00)
185  void RDataSource::Initialise() {
186 #else
188 #endif
189 
190  // A sanity check.
191  if( m_entryRanges.size() != 0 ) {
192  ::Fatal( "xAOD::RDataSource::Initialize",
193  XAOD_MESSAGE( "Function called on an initialized object" ) );
194  }
195  PRINT_VERBOSE( "Initialize: Initializing the data source" );
196 
197  // Create a chain that will help determine the optimal entry ranges
198  // to process.
200  TObjArray* filesInChain = chain->GetListOfFiles();
201 
202  // Loop over the input files of the chain.
203  Long64_t fileOffset = 0;
204  for( Int_t ifile = 0; ifile < filesInChain->GetEntries(); ++ifile ) {
205 
206  // Open the file directly.
207  const char* fileName = filesInChain->At( ifile )->GetTitle();
208  auto file = std::unique_ptr< TFile >( TFile::Open( fileName,
209  "READ" ) );
210  if( ( ! file ) || file->IsZombie() ) {
211  ::Error( "xAOD::RDataSource::Initialize",
212  XAOD_MESSAGE( "Failed to open file: %s" ), fileName );
213  throw std::runtime_error( "Failed to open file: " +
214  std::string( fileName ) );
215  }
216 
217  // Access the event tree inside of it.
218  TTree* tree =
219  dynamic_cast< TTree* >( file->Get( m_treeName.c_str() ) );
220  if( ! tree ) {
221  // A file with no event tree is not necessarily a problem. It could
222  // just be a file that has no events left in it after all
223  // selections.
224  continue;
225  }
226 
227  // Extract the ideal entry ranges from the file.
228  const Long64_t entries = tree->GetEntries();
229  TTree::TClusterIterator clusterIter( tree->GetClusterIterator( 0 ) );
230  Long64_t clusterStart = 0;
231  while( ( clusterStart = clusterIter() ) < entries ) {
232  m_entryRanges.emplace_back( fileOffset + clusterStart,
233  fileOffset +
234  clusterIter.GetNextEntry() );
235  }
236 
237  // Increment the file offset value.
238  fileOffset += entries;
239  }
240  PRINT_VERBOSE( "Initialize: Created entry ranges: " << m_entryRanges );
241 
242  // Return gracefully.
243  return;
244  }
245 
246  void RDataSource::InitSlot( unsigned int slot, ULong64_t firstEntry ) {
247 
248  // A sanity check.
249  if( m_events.size() <= slot ) {
250  ::Error( "xAOD::RDataSource::InitSlot",
251  XAOD_MESSAGE( "Invalid slot (%u) received" ), slot );
252  throw std::runtime_error( "Invalid slot received" );
253  }
254 
255  // Load the first entry for it.
256  if( m_events[ slot ]->getEntry( firstEntry ) < 0 ) {
257  ::Error( "xAOD::RDataSource::InitSlot",
258  XAOD_MESSAGE( "Failed to load entry %lld for slot %u" ),
259  firstEntry, slot );
260  throw std::runtime_error( "Failed to load entry for slot" );
261  }
262  PRINT_VERBOSE( "InitSlot: Retrieved entry " << firstEntry << " for slot "
263  << slot );
264 
265  // Activate and clear the store.
266  m_stores[ slot ]->setActive();
267  m_stores[ slot ]->clear();
268  PRINT_VERBOSE( "InitSlot: Activated and cleared transient store for slot "
269  << slot );
270 
271  // Return gracefully.
272  return;
273  }
274 
275 #if ROOT_VERSION_CODE < ROOT_VERSION(6,28,00)
276  void RDataSource::FinaliseSlot( unsigned int slot ) {
277 
278  // Simply print what's happening.
279  PRINT_VERBOSE( "FinaliseSlot: Called for slot " << slot );
280 
281  // Return gracefully.
282  return;
283  }
284 
285  void RDataSource::Finalise() {
286 
287  // Simply print what's happening.
288  PRINT_VERBOSE( "Finalise: Function called" );
289 
290  // Return gracefully.
291  return;
292  }
293 #else
294  void RDataSource::FinalizeSlot( unsigned int slot ) {
295 
296  // Simply print what's happening.
297  PRINT_VERBOSE( "FinalizeSlot: Called for slot " << slot );
298 
299  // Return gracefully.
300  return;
301  }
302 
304 
305  // Simply print what's happening.
306  PRINT_VERBOSE( "Finalize: Function called" );
307 
308  // Return gracefully.
309  return;
310  }
311 #endif
312 
313  const std::vector< std::string >& RDataSource::GetColumnNames() const {
314 
315  return m_columnNames;
316  }
317 
318  bool RDataSource::HasColumn( std::string_view name ) const {
319 
320  return std::find( m_columnNames.begin(), m_columnNames.end(),
321  name ) != m_columnNames.end();
322  }
323 
324  std::string RDataSource::GetTypeName( std::string_view column ) const {
325 
326  // Make sure that the column/object is known.
327  if( ! HasColumn( column ) ) {
328  ::Error( "xAOD::RDataSource::GetTypeName",
329  XAOD_MESSAGE( "Column/object \"%s\" not available" ),
330  column.data() );
331  throw std::runtime_error( "Column/object \"" + std::string( column ) +
332  "\" not available" );
333  }
334 
335  // Get the type.
336  auto itr = m_classNameMap.find( column.data() );
337  if( itr == m_classNameMap.end() ) {
338  // Note that the fatal message will abort the entire job in all cases.
339  ::Fatal( "xAOD::RDataSource::GetTypeName",
340  XAOD_MESSAGE( "Internal logic error found" ) );
341  }
342  PRINT_VERBOSE( "GetTypeName: Type name for column \"" << column
343  << "\" is: " << itr->second );
344  return itr->second;
345  }
346 
348 
349  // When ROOT asks for the entry ranges, we have to tell it which ones
350  // have not been processed yet. Since we process all entries right away
351  // (SetEntry(...) does not have logic for not processing a requested
352  // entry), the logic here is to empty out the m_entryRanges variable on
353  // this call. So that on the next call an empty range would be returned.
354  const EntryRanges_t dummy( std::move( m_entryRanges ) );
355  return dummy;
356  }
357 
358  bool RDataSource::SetEntry( unsigned int slot, ULong64_t entry ) {
359 
360  // A sanity check.
361  if( m_events.size() <= slot ) {
362  ::Error( "xAOD::RDataSource::SetEntry",
363  XAOD_MESSAGE( "Invalid slot (%u) received" ), slot );
364  throw std::runtime_error( "Invalid slot received" );
365  }
366  PRINT_VERBOSE( "SetEntry: Called for slot " << slot << " and entry "
367  << entry );
368 
369  // Switch to the requested entry.
370  m_events[ slot ]->updateObjectsForEntry( entry );
371 
372  // Activate and clear the store.
373  m_stores[ slot ]->setActive();
374  m_stores[ slot ]->clear();
375 
376  // The entry is always processed.
377  return true;
378  }
379 
381 
383  return;
384  }
385 
387 
388  return m_verboseOutput;
389  }
390 
391  RDataSource::Record_t
393  const std::type_info& typeInfo ) {
394 
395  // Make sure that the column/object is known.
396  if( ! HasColumn( column ) ) {
397  ::Error( "xAOD::RDataSource::GetColumnReadersImpl",
398  XAOD_MESSAGE( "Column/object \"%s\" not available" ),
399  column.data() );
400  throw std::runtime_error( "Column/object \"" + std::string( column ) +
401  "\" not available" );
402  }
403  PRINT_VERBOSE( "GetColumnReadersImpl: Creating column readers for \""
404  << column << "/" << SG::normalizedTypeinfoName( typeInfo )
405  << "\"" );
406 
407  // Create the comlumn reader pointers.
408  Record_t result( m_events.size() );
409  for( size_t i = 0; i < m_events.size(); ++i ) {
410  result[ i ] = m_events[ i ]->columnReader( column, typeInfo );
411  }
412  return result;
413  }
414 
416 
417  // Create a temporary event object.
420  if( ! event.readFrom( chain.get() ).isSuccess() ) {
421  ::Error( "xAOD::RDataSource::readInputMetadata",
422  XAOD_MESSAGE( "Failed to connect to the input chain" ) );
423  throw std::runtime_error( "Failed to connect to the input chain" );
424  }
425 
426  // Load the first event of the input, if one is available.
427  if( event.getEntries() > 0 ) {
428  if( event.getEntry( 0 ) < 0 ) {
429  ::Error( "xAOD::RDataSource::readInputMetadata",
430  "Couldn't load the first event of the input" );
431  throw std::runtime_error( "Couldn't load the first event of the "
432  "input" );
433  }
434  }
435 
436  // Fill the column and type name variables.
437  m_columnNames.clear(); m_classNameMap.clear();
438  auto names = event.columnAndTypeNames();
439  m_columnNames.reserve( names.size() );
440  m_classNameMap.reserve( names.size() );
441  for( const auto& pair : names ) {
442  m_columnNames.push_back( pair.first );
443  m_classNameMap[ pair.first ] = pair.second;
444  }
445  PRINT_VERBOSE( "readInputMetadata: m_columnNames = " << m_columnNames );
446  PRINT_VERBOSE( "readInputMetadata: m_classNameMap = " << m_classNameMap );
447 
448  // ROOT memory management is weird... We must delete the chain first,
449  // before the TEvent object on top of it would be deleted...
450  chain.reset();
451 
452  // Return gracefully.
453  return;
454  }
455 
456 } // namespace xAOD
TestUtils.makeChain
def makeChain(flags, name, L1Thresholds, ChainSteps, Streams="physics:Main", Groups=["RATE:TestRateGroup", "BW:TestBW"])
Definition: TestUtils.py:34
xAOD::RDataSource::FinalizeSlot
virtual void FinalizeSlot(unsigned int slot) override final
Close the input file reading in one of the slots/threads.
Definition: RDataSource.cxx:294
TestSUSYToolsAlg.ifile
ifile
Definition: TestSUSYToolsAlg.py:92
xAOD::RDataSource::RDataSource
RDataSource(std::string_view fileNameGlob, std::string_view treeName="CollectionTree")
Constructor with the file name pattern.
Definition: RDataSource.cxx:105
xAOD::name
name
Definition: TriggerMenuJson_v1.cxx:29
xAOD::RDataSource::m_stores
std::vector< std::unique_ptr< TStore > > m_stores
In-memory whiteboards used during the event loop.
Definition: RDataSource.h:145
get_generator_info.result
result
Definition: get_generator_info.py:21
checkxAOD.fileNames
fileNames
Definition: Tools/PyUtils/bin/checkxAOD.py:73
find
std::string find(const std::string &s)
return a remapped string
Definition: hcg.cxx:135
xAOD::RDataSource::isVerboseOutput
Bool_t isVerboseOutput() const
Check whether verbose output is set up to be printed.
Definition: RDataSource.cxx:386
SG::normalizedTypeinfoName
std::string normalizedTypeinfoName(const std::type_info &info)
Convert a type_info to a normalized string representation (matching the names used in the root dictio...
Definition: normalizedTypeinfoName.cxx:120
xAOD::RDataSource::GetEntryRanges
virtual EntryRanges_t GetEntryRanges() override final
Get the entry ranges in the input file(s)
Definition: RDataSource.cxx:347
xAOD::RDataSource::m_fileNames
std::vector< std::string > m_fileNames
Files to read.
Definition: RDataSource.h:111
tree
TChain * tree
Definition: tile_monitor.h:30
DeMoUpdate.column
dictionary column
Definition: DeMoUpdate.py:1110
athena.value
value
Definition: athena.py:122
xAOD
ICaloAffectedTool is abstract interface for tools checking if 4 mom is in calo affected region.
Definition: ICaloAffectedTool.h:24
XAOD_MESSAGE
#define XAOD_MESSAGE(MESSAGE)
Simple macro for printing error/verbose messages.
Definition: Control/xAODRootAccess/xAODRootAccess/tools/Message.h:19
vec
std::vector< size_t > vec
Definition: CombinationsGeneratorTest.cxx:12
xAOD::RDataSource::m_chains
std::vector< std::unique_ptr< TChain > > m_chains
Chains used in the file I/O.
Definition: RDataSource.h:141
xAOD::RDataSource::m_classNameMap
std::unordered_map< std::string, std::string > m_classNameMap
The object name -> class name map.
Definition: RDataSource.h:125
beamspotPlotBcids.chain
chain
Definition: beamspotPlotBcids.py:442
xAOD::RDataSource::m_events
std::vector< std::unique_ptr< RDataSourceEvent > > m_events
Event objects performing the file I/O.
Definition: RDataSource.h:143
xAOD::RDataSource::SetEntry
virtual bool SetEntry(unsigned int slot, ULong64_t entry) override final
Set which entry a give slot/thread should be processing.
Definition: RDataSource.cxx:358
xAOD::RDataSource::m_entryRanges
EntryRanges_t m_entryRanges
Optimal entry ranges to split the processing into.
Definition: RDataSource.h:133
xAOD::RDataSource::m_verboseOutput
Bool_t m_verboseOutput
Whether verbose output should be printed or not.
Definition: RDataSource.h:115
xAOD::RDataSource
Data source for xAOD input files.
Definition: RDataSource.h:35
POOL::TEvent::readFrom
StatusCode readFrom(TFile *file)
Definition: PhysicsAnalysis/POOLRootAccess/src/TEvent.cxx:132
FortranAlgorithmOptions.fileName
fileName
Definition: FortranAlgorithmOptions.py:13
xAOD::RDataSource::m_columnNames
std::vector< std::string > m_columnNames
Names of the columns/objects on the input.
Definition: RDataSource.h:123
xAOD::RDataSource::GetColumnReadersImpl
virtual Record_t GetColumnReadersImpl(std::string_view column, const std::type_info &typeInfo) override final
Return the type-erased vector of pointers to pointers to column values.
Definition: RDataSource.cxx:392
event
POOL::TEvent event(POOL::TEvent::kClassAccess)
POOL::TEvent::getEntry
int getEntry(long entry)
Definition: PhysicsAnalysis/POOLRootAccess/src/TEvent.cxx:184
lumiFormat.i
int i
Definition: lumiFormat.py:92
POOL::TEvent::getEntries
long getEntries()
Definition: PhysicsAnalysis/POOLRootAccess/src/TEvent.cxx:123
vector
Definition: MultiHisto.h:13
xAOD::RDataSource::Finalize
virtual void Finalize() override final
Finalize the data source, after the event loop.
Definition: RDataSource.cxx:303
Message.h
TEvent.h
file
TFile * file
Definition: tile_monitor.h:29
dumpFileToPlots.treeName
string treeName
Definition: dumpFileToPlots.py:20
python.xAODType.dummy
dummy
Definition: xAODType.py:4
xAOD::RDataSource::Initialize
virtual void Initialize() override final
Initialize the data source, before the start of the event loop.
Definition: RDataSource.cxx:187
GetAllXsec.entry
list entry
Definition: GetAllXsec.py:132
xAOD::RDataSource::HasColumn
virtual bool HasColumn(std::string_view name) const override final
Check if the dataset has a certain column/object.
Definition: RDataSource.cxx:318
xAOD::RDataSource::EntryRanges_t
std::vector< std::pair< ULong64_t, ULong64_t > > EntryRanges_t
Type describing the entry ranges of the input file(s)
Definition: RDataSource.h:48
test_pythinning.out
out
Definition: test_pythinning.py:94
xAOD::RDataSource::readInputMetadata
void readInputMetadata()
Fill the metadata variables.
Definition: RDataSource.cxx:415
xAOD::RDataSource::setVerboseOutput
void setVerboseOutput(Bool_t value=kTRUE)
Set whether verbose output should be printed (for debugging)
Definition: RDataSource.cxx:380
PRINT_VERBOSE
#define PRINT_VERBOSE(MSG)
Helper macro for printing verbose messages for debugging.
Definition: RDataSource.cxx:96
xAOD::RDataSource::m_treeName
std::string m_treeName
Name of the event tree in the input files.
Definition: RDataSource.h:113
xAOD::RDataSource::InitSlot
virtual void InitSlot(unsigned int slot, ULong64_t firstEntry) override final
Initialize one of the slots/threads.
Definition: RDataSource.cxx:246
xAOD::RDataSource::~RDataSource
~RDataSource()
Destructor.
Definition: RDataSource.cxx:120
xAOD::RDataSourceEvent
Extension to xAOD::TEvent, used by xAOD::RDataSource.
Definition: RDataSourceEvent.h:31
DeMoScan.first
bool first
Definition: DeMoScan.py:534
entries
double entries
Definition: listroot.cxx:49
xAOD::RDataSource::GetColumnNames
virtual const std::vector< std::string > & GetColumnNames() const override final
Get the column/object names for the input file(s)
Definition: RDataSource.cxx:313
xAOD::RDataSource::GetTypeName
virtual std::string GetTypeName(std::string_view column) const override final
Get the type name of a given column/object.
Definition: RDataSource.cxx:324
RDataSource.h
xAOD::RDataSource::SetNSlots
virtual void SetNSlots(unsigned int slots) override final
Set the number of threads/slots that the data source should use.
Definition: RDataSource.cxx:127
xAOD::operator<<
std::ostream & operator<<(std::ostream &out, const std::pair< FIRST, SECOND > &pair)
Helper print operator.
Definition: RDataSource.cxx:53