ATLAS Offline Software
Loading...
Searching...
No Matches
trfValidateRootFile.py
Go to the documentation of this file.
1#!/usr/bin/env python
2
3# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
4
5
9
10
11import sys, os
12import logging
13
14from PyUtils import RootUtils
15ROOT = RootUtils.import_root()
16from ROOT import TFile, TTree, TDirectory, TStopwatch
17try:
18 from ROOT import RNTupleReader
19except ImportError:
20 from ROOT.Experimental import RNTupleReader
21from PyUtils.PoolFile import isRNTuple
22
23msg = logging.getLogger(__name__)
24
25def checkBranch(branch):
26
27 msg.debug('Checking branch %s ...', branch.GetName())
28
29 nBaskets=branch.GetWriteBasket()
30
31 msg.debug('Checking %s baskets ...', nBaskets)
32
33 for iBasket in range(nBaskets):
34 basket=branch.GetBasket(iBasket)
35 if not basket:
36 msg.warning('Basket %s of branch %s is corrupted.', iBasket, branch.GetName() )
37 return 1
38
39 listOfSubBranches=branch.GetListOfBranches()
40 msg.debug('Checking %s subbranches ...', listOfSubBranches.GetEntries())
41 for subBranch in listOfSubBranches:
42 if checkBranch(subBranch)==1:
43 return 1
44
45 msg.debug('Branch %s looks ok.', branch.GetName())
46 return 0
47
48
50
51 listOfBranches=tree.GetListOfBranches()
52
53 msg.debug('Checking %s branches ...', listOfBranches.GetEntries())
54
55 for branch in listOfBranches:
56 if checkBranch(branch)==1:
57 msg.warning('Tree %s is corrupted (branch %s ).', tree.GetName(), branch.GetName())
58 return 1
59
60 return 0
61
62
63def checkTreeEventWise(tree, printInterval = 150000):
64
65 nEntries=tree.GetEntries()
66
67 msg.debug('Checking %s entries ...', nEntries)
68
69 for i in range(nEntries):
70 if tree.GetEntry(i)<0:
71 msg.warning('Event %s of tree %s is corrupted.', i, tree.GetName())
72 return 1
73
74 # Show a sign of life for long validation jobs: ATLASJT-433
75 if (i%printInterval)==0 and i>0:
76 msg.info('Validated %s events so far ...', i)
77
78 return 0
79
80def checkNTupleEventWise(ntuple, printInterval = 150000):
81
82 try:
83 reader=RNTupleReader.Open(ntuple)
84 except Exception as err:
85 msg.warning('Could not open ntuple %s: %s', ntuple, err)
86 return 1
87
88 msg.debug('Checking %s entries ...', reader.GetNEntries())
89
90 try:
91 entry = reader.CreateEntry()
92 except AttributeError:
93 entry = reader.GetModel().CreateEntry()
94 for i in reader:
95 try:
96 reader.LoadEntry(i, entry)
97 except Exception as err:
98 msg.warning('Event %s of ntuple %s is corrupted: %s', i, reader.GetDescriptor().GetName(), err)
99 return 1
100
101 # Show a sign of life for long validation jobs: ATLASJT-433
102 if (i%printInterval)==0 and i>0:
103 msg.info('Validated %s events so far ...', i)
104
105 return 0
106
108 """Bulk read each top level field cluster by cluster.
109 """
110 from array import array
111 try:
112 from ROOT import RException
113 except ImportError:
114 from ROOT.Experimental import RException
115
116 try:
117 reader=RNTupleReader.Open(ntuple)
118 except Exception as err:
119 msg.warning('Could not open ntuple %r: %r', ntuple, err)
120 return 1
121
122 try:
123 descriptor = reader.GetDescriptor()
124 msg.debug(f"ntupleName={descriptor.GetName()}")
125
126 model = reader.GetModel()
127 try:
128 fieldZero = model.GetFieldZero()
129 except AttributeError:
130 # ROOT Version: 6.35.01
131 fieldZero = model.GetConstFieldZero()
132 try:
133 subFields = fieldZero.GetSubFields()
134 except AttributeError:
135 subFields = fieldZero.GetConstSubfields()
136 msg.debug(f"Top level fields number {subFields.size()}")
137 for field in subFields:
138 msg.debug(f"fieldName={field.GetFieldName()} typeName={field.GetTypeName()}")
139 bulk = model.CreateBulk(field.GetFieldName())
140
141 for clusterDescriptor in descriptor.GetClusterIterable():
142 try:
143 clusterIndex = ROOT.Experimental.RClusterIndex(clusterDescriptor.GetId(), 0)
144 except AttributeError:
145 # ROOT Version: 6.35.01
146 clusterIndex = ROOT.RNTupleLocalIndex(clusterDescriptor.GetId(), 0)
147 size = int(clusterDescriptor.GetNEntries())
148 maskReq = array('b', (True for i in range(size)))
149 msg.debug(f" cluster #{clusterIndex.GetClusterId()}"
150 f" firstEntryIndex={clusterDescriptor.GetFirstEntryIndex()}"
151 f" nEntries={size}")
152 values = bulk.ReadBulk(clusterIndex, maskReq, size)
153 msg.debug(f" values array at {values}")
154
155 except RException as err:
156 from traceback import format_exception
157 msg.error("Exception reading ntuple %r\n%s", ntuple, "".join(format_exception(err)))
158 return 1
159
160 return 0
161
162def checkDirectory(directory, the_type, requireTree, depth):
163
164 from PyUtils import PoolFile
165 nentries = None
166 hasMetadata = False
167
168 msg.debug('Checking directory %s ...', directory.GetName())
169
170 listOfKeys=directory.GetListOfKeys()
171
172 msg.debug('Checking %s keys ... ', listOfKeys.GetEntries())
173
174 for key in listOfKeys:
175
176 msg.debug('Looking at key %s ...', key.GetName())
177 msg.debug('Key is of class %s.', key.GetClassName())
178
179 the_object=directory.Get(key.GetName())
180 if not the_object:
181 msg.warning("Can't get object of key %s.", key.GetName())
182 return 1
183
184 if requireTree and not isinstance(the_object, TTree):
185 msg.warning("Object of key %s is not of class TTree!", key.GetName())
186 return 1
187
188 if isinstance(the_object,TTree):
189
190 msg.debug('Checking tree %s ...', the_object.GetName())
191
192 if depth == 0:
193 if PoolFile.PoolOpts.TTreeNames.EventData == the_object.GetName():
194 nentries = the_object.GetEntries()
195 msg.debug(f' contains {nentries} events')
196 elif PoolFile.PoolOpts.TTreeNames.MetaData == the_object.GetName():
197 hasMetadata = True
198 msg.debug(' contains MetaData')
199
200 if the_type=='event':
201 if checkTreeEventWise(the_object)==1:
202 return 1
203 elif the_type=='basket':
204 if checkTreeBasketWise(the_object)==1:
205 return 1
206
207 msg.debug('Tree %s looks ok.', the_object.GetName())
208
209 if isRNTuple(the_object):
210
211 msg.debug('Checking ntuple of key %s ...', key.GetName())
212
213 try:
214 reader=RNTupleReader.Open(the_object)
215 except Exception as err:
216 msg.warning('Could not open ntuple %s: %s', the_object, err)
217 return 1
218
219 if depth == 0:
220 if PoolFile.PoolOpts.RNTupleNames.EventData == reader.GetDescriptor().GetName():
221 nentries = reader.GetNEntries()
222 msg.debug(f' contains {nentries} events')
223 elif PoolFile.PoolOpts.RNTupleNames.MetaData == reader.GetDescriptor().GetName():
224 hasMetadata = True
225 msg.debug(' contains MetaData')
226
227 if the_type=='event':
228 if checkNTupleEventWise(the_object)==1:
229 return 1
230 elif the_type=='basket':
231 if checkNTupleFieldWise(the_object)==1:
232 return 1
233
234 msg.debug('NTuple of key %s looks ok.', key.GetName())
235
236 if isinstance(the_object, TDirectory):
237 if checkDirectory(the_object, the_type, requireTree, depth + 1)==1:
238 return 1
239
240 # Only check if metadata object is available as in standard POOL files
241 if depth == 0 and hasMetadata and checkNEvents(directory.GetName(), nentries)==1:
242 return 1
243 else:
244 msg.debug('Directory %s looks ok.', directory.GetName())
245 return 0
246
247
248def checkFile(fileName, the_type, requireTree):
249
250 msg.info('Checking file %s ...', fileName)
251
252 enabledIMT = False
253 if not ROOT.ROOT.IsImplicitMTEnabled() and 'TRF_MULTITHREADED_VALIDATION' in os.environ and 'ATHENA_CORE_NUMBER' in os.environ:
254 if (nThreads := int(os.environ['ATHENA_CORE_NUMBER'])) >= 0:
255 msg.info(f"Setting the number of implicit ROOT threads to {nThreads}")
256 ROOT.ROOT.EnableImplicitMT(nThreads)
257 enabledIMT = True
258 else:
259 msg.warning(f"Ignored negative ATHENA_CORE_NUMBER ({nThreads})")
260
261 file_handle=TFile.Open(fileName)
262
263 if not file_handle:
264 msg.warning("Can't access file %s.", fileName)
265 return 1
266
267 if not file_handle.IsOpen():
268 msg.warning("Can't open file %s.", fileName)
269 return 1
270
271 if file_handle.IsZombie():
272 msg.warning("File %s is a zombie.", fileName)
273 file_handle.Close()
274 return 1
275
276 if file_handle.TestBit(TFile.kRecovered):
277 msg.warning("File %s needed to be recovered.", fileName)
278 file_handle.Close()
279 return 1
280
281 if checkDirectory(file_handle, the_type, requireTree, 0)==1:
282 msg.warning("File %s is corrupted.", fileName)
283 file_handle.Close()
284 return 1
285
286 file_handle.Close()
287 msg.info("File %s looks ok.", fileName)
288
289 if enabledIMT:
290 ROOT.ROOT.DisableImplicitMT()
291
292 return 0
293
294
295def checkNEvents(fileName, nEntries):
296 """Check consistency of number of events in file with metadata.
297
298 fileName name of file to check consistency of
299 nEntries number of events in fileName (e.g., obtained by examining event data object)
300 return 0 in case of consistency, 1 otherwise
301 """
302 from PyUtils.MetaReader import read_metadata
303
304 msg.debug('Checking number of events in file %s ...', fileName)
305
306 meta = read_metadata(fileName, mode='lite')[fileName]
307 msg.debug(' according to metadata: {0}'.format(meta["nentries"]))
308 msg.debug(' according to event data: {0}'.format(nEntries))
309 if meta["nentries"] and nEntries and meta["nentries"] != nEntries \
310 or meta["nentries"] and not nEntries \
311 or not meta["nentries"] and nEntries:
312 msg.warning(f' number of events ({nEntries}) inconsistent with metadata ({meta["nentries"]}) in file {fileName!r}.')
313 return 1
314 else:
315 msg.debug(" looks ok.")
316 return 0
317
318def usage():
319 print("Usage: validate filename type requireTree verbosity")
320 print("'type' must be either 'event' or 'basket'")
321 print("'requireTree' must be either 'true' or 'false'")
322 print("'verbosity' must be either 'on' or 'off'")
323
324 return 2
325
326
327def main(argv):
328
329 clock=TStopwatch()
330
331 argc=len(argv)
332
333 if (argc!=5):
334 return usage()
335
336 fileName=argv[1]
337 the_type=argv[2]
338 requireTree=argv[3]
339 verbosity=argv[4]
340
341
342 if the_type!="event" and the_type!="basket":
343 return usage()
344
345 if requireTree=="true":
346 requireTree=True
347 elif requireTree=="false":
348 requireTree=False
349 else:
350 return usage()
351
352 if verbosity=="on":
353 msg.setLevel(logging.DEBUG)
354 elif verbosity=="off":
355 msg.setLevel(logging.INFO)
356 else:
357 return usage()
358
359 rc=checkFile(fileName,the_type, requireTree)
360 msg.debug('Returning %s', rc)
361
362 clock.Stop()
363 clock.Print()
364
365 return rc
366
367
368if __name__ == '__main__':
369
370 ch=logging.StreamHandler(sys.stdout)
371 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
372 ch.setFormatter(formatter)
373 msg.addHandler(ch)
374
375 rc=main(sys.argv)
376 sys.exit(rc)
377
void print(char *figname, TCanvas *c1)
STL class.
checkNTupleEventWise(ntuple, printInterval=150000)
checkDirectory(directory, the_type, requireTree, depth)
checkTreeEventWise(tree, printInterval=150000)