ATLAS Offline Software
Loading...
Searching...
No Matches
RDataSource.cxx
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3//
4
5// Local include(s).
6#include "RDataSource.h"
7
8// xAOD include(s).
11
12// ROOT include(s).
13#include <TChain.h>
14#include <TFile.h>
15#include <TROOT.h>
16#include <TError.h>
17
18// System include(s).
19#include <algorithm>
20#include <memory>
21#include <stdexcept>
22
23namespace {
24
26 std::unique_ptr< TChain >
27 makeChain( const std::vector< std::string >& fileNames,
28 std::string_view treeName ) {
29
30 // Create the chain object.
31 std::unique_ptr< TChain > chain;
32 {
33 R__LOCKGUARD( gROOTMutex );
34 chain = std::make_unique< TChain >( treeName.data() );
35 }
36
37 // Set it up.
38 chain->ResetBit( kMustCleanup );
39 for( const std::string& fileName : fileNames ) {
40 chain->Add( fileName.c_str() );
41 }
42
43 // Return the newly created chain.
44 return chain;
45 }
46
47} // private namespace
48
49namespace xAOD {
50
52 template< typename FIRST, typename SECOND >
53 std::ostream& operator<< ( std::ostream& out,
54 const std::pair< FIRST, SECOND >& pair ) {
55
56 out << "[" << pair.first << ", " << pair.second << "]";
57 return out;
58 }
59
61 template< typename T >
62 std::ostream& operator<< ( std::ostream& out, const std::vector< T >& vec ) {
63
64 out << "[";
65 for( size_t i = 0; i < vec.size(); ++i ) {
66 out << vec[ i ];
67 if( i + 1 < vec.size() ) {
68 out << ", ";
69 }
70 }
71 out << "]";
72 return out;
73 }
74
76 template< typename T >
77 std::ostream& operator<< ( std::ostream& out,
78 const std::unordered_map< T, T >& umap ) {
79
80 out << "{";
81 bool first = true;
82 for( auto& upair : umap ) {
83 if( ! first ) {
84 out << ", ";
85 }
86 out << upair.first << " : " << upair.second;
87 first = false;
88 }
89 out << "}";
90 return out;
91 }
92
93} // namespace xAOD
94
96#define PRINT_VERBOSE(MSG) \
97 do { \
98 if( m_verboseOutput ) { \
99 std::cout << "xAOD::RDataSource VERBOSE " << MSG << std::endl; \
100 } \
101 } while( false )
102
103namespace xAOD {
104
105 RDataSource::RDataSource( std::string_view fileNameGlob,
106 std::string_view treeName )
107 : RDataSource( std::vector< std::string >( { fileNameGlob.data() } ),
108 treeName ) {
109
110 }
111
112 RDataSource::RDataSource( const std::vector< std::string >& fileNames,
113 std::string_view treeName )
114 : m_fileNames( fileNames ), m_treeName( treeName ) {
115
117 }
118
120
121 // I don't understand why, but ROOT really doesn't like it if the
122 // chains are not the first to be deleted from memory. :-/
123 m_chains.clear();
124 }
125
126 void RDataSource::SetNSlots( unsigned int slots ) {
127
128 // Some sanity checks.
129 if( slots == 0 ) {
130 ::Error( "xAOD::RDataSource::SetNSlots",
131 XAOD_MESSAGE( "Zero slots requested" ) );
132 throw std::invalid_argument( "Zero slots requested" );
133 }
134 if( m_events.size() != 0 ) {
135 ::Error( "xAOD::RDataSource::SetNSlots",
136 XAOD_MESSAGE( "Function called multiple times" ) );
137 throw std::runtime_error( "Function called multiple times" );
138 }
139
140 // Reserve the correct number of elements.
141 m_chains.reserve( slots );
142 m_events.reserve( slots );
143 m_stores.reserve( slots );
144 PRINT_VERBOSE( "SetNSlots: Reserved " << slots
145 << " slots for the chains, events and stores" );
146
147 // Create the event objects already at this point.
148 for( unsigned int i = 0; i < slots; ++i ) {
149
150 // Set up the chain, event and store.
151 m_chains.push_back( ::makeChain( m_fileNames, m_treeName ) );
152 m_events.push_back(
153 std::make_unique< RDataSourceEvent >( m_auxmode ) );
154 m_stores.push_back( std::make_unique< TStore >() );
155 TChain* chain = m_chains.back().get();
156 RDataSourceEvent* event = m_events.back().get();
157
158 // Initialize the event object.
159 if( ! event->readFrom( chain ).isSuccess() ) {
160 ::Error( "xAOD::RDataSource::SetNSlots",
161 XAOD_MESSAGE( "Failed to set up xAOD::RDataSourceEvent "
162 "for slot %u" ), i );
163 throw std::runtime_error( "Failed to set up "
164 "xAOD::RDataSourceEvent" );
165 }
166
167 // Load entry 0 for it. Notice that this is a waste of CPU and I/O
168 // on the surface. But it's not... This triggers the initialization of
169 // the files/trees used by these chains. Which happens much more
170 // quickly in a serial way in a single thread than in multiple threads
171 // at the same time. To be followed up with the ROOT developers...
172 if( event->getEntry( 0 ) < 0 ) {
173 ::Error( "xAOD::RDataSource::SetNSlots",
174 XAOD_MESSAGE( "Failed to load entry 0 for slot %u" ), i );
175 throw std::runtime_error( "Failed to load entry for slot" );
176 }
177 PRINT_VERBOSE( "SetNSlots: Initialized objects for slot " << i );
178 }
179
180#if ROOT_VERSION_CODE >= ROOT_VERSION(6,35,99)
181 // Pass on to the base class.
182 ROOT::RDF::RDataSource::SetNSlots( slots );
183#endif
184
185 // Return gracefully.
186 return;
187 }
188
190
191 // A sanity check.
192 if( m_entryRanges.size() != 0 ) {
193 ::Fatal( "xAOD::RDataSource::Initialize",
194 XAOD_MESSAGE( "Function called on an initialized object" ) );
195 }
196 PRINT_VERBOSE( "Initialize: Initializing the data source" );
197
198 // Create a chain that will help determine the optimal entry ranges
199 // to process.
200 auto chain = ::makeChain( m_fileNames, m_treeName );
201 TObjArray* filesInChain = chain->GetListOfFiles();
202
203 // Loop over the input files of the chain.
204 Long64_t fileOffset = 0;
205 for( Int_t ifile = 0; ifile < filesInChain->GetEntries(); ++ifile ) {
206
207 // Open the file directly.
208 const char* fileName = filesInChain->At( ifile )->GetTitle();
209 auto file = std::unique_ptr< TFile >( TFile::Open( fileName,
210 "READ" ) );
211 if( ( ! file ) || file->IsZombie() ) {
212 ::Error( "xAOD::RDataSource::Initialize",
213 XAOD_MESSAGE( "Failed to open file: %s" ), fileName );
214 throw std::runtime_error( "Failed to open file: " +
215 std::string( fileName ) );
216 }
217
218 // Access the event tree inside of it.
219 TTree* tree =
220 dynamic_cast< TTree* >( file->Get( m_treeName.c_str() ) );
221 if( ! tree ) {
222 // A file with no event tree is not necessarily a problem. It could
223 // just be a file that has no events left in it after all
224 // selections.
225 continue;
226 }
227
228 // Extract the ideal entry ranges from the file.
229 const Long64_t entries = tree->GetEntries();
230 TTree::TClusterIterator clusterIter( tree->GetClusterIterator( 0 ) );
231 Long64_t clusterStart = 0;
232 while( ( clusterStart = clusterIter() ) < entries ) {
233 m_entryRanges.emplace_back( fileOffset + clusterStart,
234 fileOffset +
235 clusterIter.GetNextEntry() );
236 }
237
238 // Increment the file offset value.
239 fileOffset += entries;
240 }
241 PRINT_VERBOSE( "Initialize: Created entry ranges: " << m_entryRanges );
242
243 // Return gracefully.
244 return;
245 }
246
247 void RDataSource::InitSlot( unsigned int slot, ULong64_t firstEntry ) {
248
249 // A sanity check.
250 if( m_events.size() <= slot ) {
251 ::Error( "xAOD::RDataSource::InitSlot",
252 XAOD_MESSAGE( "Invalid slot (%u) received" ), slot );
253 throw std::runtime_error( "Invalid slot received" );
254 }
255
256 // Load the first entry for it.
257 if( m_events[ slot ]->getEntry( firstEntry ) < 0 ) {
258 ::Error( "xAOD::RDataSource::InitSlot",
259 XAOD_MESSAGE( "Failed to load entry %lld for slot %u" ),
260 firstEntry, slot );
261 throw std::runtime_error( "Failed to load entry for slot" );
262 }
263 PRINT_VERBOSE( "InitSlot: Retrieved entry " << firstEntry << " for slot "
264 << slot );
265
266 // Activate and clear the store.
267 m_stores[ slot ]->setActive();
268 m_stores[ slot ]->clear();
269 PRINT_VERBOSE( "InitSlot: Activated and cleared transient store for slot "
270 << slot );
271
272 // Return gracefully.
273 return;
274 }
275
276 void RDataSource::FinalizeSlot( unsigned int slot ) {
277
278 // Simply print what's happening.
279 PRINT_VERBOSE( "FinalizeSlot: Called for slot " << slot );
280
281 // Return gracefully.
282 return;
283 }
284
286
287 // Simply print what's happening.
288 PRINT_VERBOSE( "Finalize: Function called" );
289
290 // Return gracefully.
291 return;
292 }
293
294 const std::vector< std::string >& RDataSource::GetColumnNames() const {
295
296 return m_columnNames;
297 }
298
299 bool RDataSource::HasColumn( std::string_view name ) const {
300
301 return std::find( m_columnNames.begin(), m_columnNames.end(),
302 name ) != m_columnNames.end();
303 }
304
305 std::string RDataSource::GetTypeName( std::string_view column ) const {
306
307 // Make sure that the column/object is known.
308 if( ! HasColumn( column ) ) {
309 ::Error( "xAOD::RDataSource::GetTypeName",
310 XAOD_MESSAGE( "Column/object \"%s\" not available" ),
311 column.data() );
312 throw std::runtime_error( "Column/object \"" + std::string( column ) +
313 "\" not available" );
314 }
315
316 // Get the type.
317 auto itr = m_classNameMap.find( column.data() );
318 if( itr == m_classNameMap.end() ) {
319 // Note that the fatal message will abort the entire job in all cases.
320 ::Fatal( "xAOD::RDataSource::GetTypeName",
321 XAOD_MESSAGE( "Internal logic error found" ) );
322 }
323 PRINT_VERBOSE( "GetTypeName: Type name for column \"" << column
324 << "\" is: " << itr->second );
325 return itr->second;
326 }
327
329
330 // When ROOT asks for the entry ranges, we have to tell it which ones
331 // have not been processed yet. Since we process all entries right away
332 // (SetEntry(...) does not have logic for not processing a requested
333 // entry), the logic here is to empty out the m_entryRanges variable on
334 // this call. So that on the next call an empty range would be returned.
335 const EntryRanges_t dummy( std::move( m_entryRanges ) );
336 return dummy;
337 }
338
339 bool RDataSource::SetEntry( unsigned int slot, ULong64_t entry ) {
340
341 // A sanity check.
342 if( m_events.size() <= slot ) {
343 ::Error( "xAOD::RDataSource::SetEntry",
344 XAOD_MESSAGE( "Invalid slot (%u) received" ), slot );
345 throw std::runtime_error( "Invalid slot received" );
346 }
347 PRINT_VERBOSE( "SetEntry: Called for slot " << slot << " and entry "
348 << entry );
349
350 // Switch to the requested entry.
351 m_events[ slot ]->updateObjectsForEntry( entry );
352
353 // Activate and clear the store.
354 m_stores[ slot ]->setActive();
355 m_stores[ slot ]->clear();
356
357 // The entry is always processed.
358 return true;
359 }
360
361 void RDataSource::setVerboseOutput( Bool_t value ) {
362
363 m_verboseOutput = value;
364 return;
365 }
366
368
369 return m_verboseOutput;
370 }
371
373
374 m_auxmode = mode;
375 return;
376 }
377
379
380 return m_auxmode;
381 }
382
383 RDataSource::Record_t
384 RDataSource::GetColumnReadersImpl( std::string_view column,
385 const std::type_info& typeInfo ) {
386
387 // Make sure that the column/object is known.
388 if( ! HasColumn( column ) ) {
389 ::Error( "xAOD::RDataSource::GetColumnReadersImpl",
390 XAOD_MESSAGE( "Column/object \"%s\" not available" ),
391 column.data() );
392 throw std::runtime_error( "Column/object \"" + std::string( column ) +
393 "\" not available" );
394 }
395 PRINT_VERBOSE( "GetColumnReadersImpl: Creating column readers for \""
396 << column << "/" << SG::normalizedTypeinfoName( typeInfo )
397 << "\"" );
398
399 // Create the comlumn reader pointers.
400 Record_t result( m_events.size() );
401 for( size_t i = 0; i < m_events.size(); ++i ) {
402 result[ i ] = m_events[ i ]->columnReader( column, typeInfo );
403 }
404 return result;
405 }
406
408
409 // Create a temporary event object.
410 auto chain = ::makeChain( m_fileNames, m_treeName );
411 RDataSourceEvent event;
412 if( ! event.readFrom( chain.get() ).isSuccess() ) {
413 ::Error( "xAOD::RDataSource::readInputMetadata",
414 XAOD_MESSAGE( "Failed to connect to the input chain" ) );
415 throw std::runtime_error( "Failed to connect to the input chain" );
416 }
417
418 // Load the first event of the input, if one is available.
419 if( event.getEntries() > 0 ) {
420 if( event.getEntry( 0 ) < 0 ) {
421 ::Error( "xAOD::RDataSource::readInputMetadata",
422 "Couldn't load the first event of the input" );
423 throw std::runtime_error( "Couldn't load the first event of the "
424 "input" );
425 }
426 }
427
428 // Fill the column and type name variables.
429 m_columnNames.clear(); m_classNameMap.clear();
430 auto names = event.columnAndTypeNames();
431 m_columnNames.reserve( names.size() );
432 m_classNameMap.reserve( names.size() );
433 for( const auto& pair : names ) {
434 m_columnNames.push_back( pair.first );
435 m_classNameMap[ pair.first ] = pair.second;
436 }
437 PRINT_VERBOSE( "readInputMetadata: m_columnNames = " << m_columnNames );
438 PRINT_VERBOSE( "readInputMetadata: m_classNameMap = " << m_classNameMap );
439
440 // ROOT memory management is weird... We must delete the chain first,
441 // before the TEvent object on top of it would be deleted...
442 chain.reset();
443
444 // Return gracefully.
445 return;
446 }
447
448} // namespace xAOD
std::vector< size_t > vec
#define XAOD_MESSAGE(MESSAGE)
Simple macro for printing error/verbose messages.
#define PRINT_VERBOSE(MSG)
Helper macro for printing verbose messages for debugging.
STL class.
STL class.
Extension to xAOD::TEvent, used by xAOD::RDataSource.
std::vector< std::string > m_fileNames
Files to read.
RDataSource(std::string_view fileNameGlob, std::string_view treeName="CollectionTree")
Constructor with the file name pattern.
std::vector< std::pair< ULong64_t, ULong64_t > > EntryRanges_t
Type describing the entry ranges of the input file(s)
Definition RDataSource.h:48
TEvent::EAuxMode m_auxmode
Auxiliary access mode.
~RDataSource()
Destructor.
void readInputMetadata()
Fill the metadata variables.
Bool_t m_verboseOutput
Whether verbose output should be printed or not.
virtual void InitSlot(unsigned int slot, ULong64_t firstEntry) override final
Initialize one of the slots/threads.
void setVerboseOutput(Bool_t value=kTRUE)
Set whether verbose output should be printed (for debugging)
virtual std::string GetTypeName(std::string_view column) const override final
Get the type name of a given column/object.
void setAuxMode(TEvent::EAuxMode mode)
Set the auxiliary access mode.
virtual Record_t GetColumnReadersImpl(std::string_view column, const std::type_info &typeInfo) override final
Return the type-erased vector of pointers to pointers to column values.
std::vector< std::unique_ptr< TStore > > m_stores
In-memory whiteboards used during the event loop.
virtual void FinalizeSlot(unsigned int slot) override final
Close the input file reading in one of the slots/threads.
virtual void Finalize() override final
Finalize the data source, after the event loop.
std::unordered_map< std::string, std::string > m_classNameMap
The object name -> class name map.
virtual bool SetEntry(unsigned int slot, ULong64_t entry) override final
Set which entry a give slot/thread should be processing.
virtual const std::vector< std::string > & GetColumnNames() const override final
Get the column/object names for the input file(s)
std::vector< std::unique_ptr< RDataSourceEvent > > m_events
Event objects performing the file I/O.
TEvent::EAuxMode auxMode() const
Get the auxiliary access mode.
std::vector< std::string > m_columnNames
Names of the columns/objects on the input.
virtual void SetNSlots(unsigned int slots) override final
Set the number of threads/slots that the data source should use.
EntryRanges_t m_entryRanges
Optimal entry ranges to split the processing into.
std::string m_treeName
Name of the event tree in the input files.
virtual EntryRanges_t GetEntryRanges() override final
Get the entry ranges in the input file(s)
std::vector< std::unique_ptr< TChain > > m_chains
Chains used in the file I/O.
virtual void Initialize() override final
Initialize the data source, before the start of the event loop.
virtual bool HasColumn(std::string_view name) const override final
Check if the dataset has a certain column/object.
Bool_t isVerboseOutput() const
Check whether verbose output is set up to be printed.
double entries
Definition listroot.cxx:49
std::string normalizedTypeinfoName(const std::type_info &info)
Convert a type_info to a normalized string representation (matching the names used in the root dictio...
makeChain(flags, name, L1Thresholds, ChainSteps, Streams="physics:Main", Groups=["RATE:TestRateGroup", "BW:TestBW"])
Definition TestUtils.py:36
STL namespace.
ICaloAffectedTool is abstract interface for tools checking if 4 mom is in calo affected region.
static const SG::AuxElement::Accessor< std::vector< std::string > > names("thrNames")
Accessor for the names of the passed thresholds.
std::ostream & operator<<(std::ostream &out, const std::pair< FIRST, SECOND > &pair)
Helper print operator.
TChain * tree
TFile * file