Loading [MathJax]/extensions/tex2jax.js
ATLAS Offline Software
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
trfValidateRootFile.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
4 
5 
9 
10 
11 import sys, os
12 import logging
13 
14 from PyUtils import RootUtils
15 ROOT = RootUtils.import_root()
16 from ROOT import TFile, TTree, TDirectory, TStopwatch
17 from ROOT.Experimental import RNTupleReader
18 from PyUtils.PoolFile import isRNTuple
19 
20 msg = logging.getLogger(__name__)
21 
22 def checkBranch(branch):
23 
24  msg.debug('Checking branch %s ...', branch.GetName())
25 
26  nBaskets=branch.GetWriteBasket()
27 
28  msg.debug('Checking %s baskets ...', nBaskets)
29 
30  for iBasket in range(nBaskets):
31  basket=branch.GetBasket(iBasket)
32  if not basket:
33  msg.warning('Basket %s of branch %s is corrupted.', iBasket, branch.GetName() )
34  return 1
35 
36  listOfSubBranches=branch.GetListOfBranches()
37  msg.debug('Checking %s subbranches ...', listOfSubBranches.GetEntries())
38  for subBranch in listOfSubBranches:
39  if checkBranch(subBranch)==1:
40  return 1
41 
42  msg.debug('Branch %s looks ok.', branch.GetName())
43  return 0
44 
45 
47 
48  listOfBranches=tree.GetListOfBranches()
49 
50  msg.debug('Checking %s branches ...', listOfBranches.GetEntries())
51 
52  for branch in listOfBranches:
53  if checkBranch(branch)==1:
54  msg.warning('Tree %s is corrupted (branch %s ).', tree.GetName(), branch.GetName())
55  return 1
56 
57  return 0
58 
59 
60 def checkTreeEventWise(tree, printInterval = 150000):
61 
62  nEntries=tree.GetEntries()
63 
64  msg.debug('Checking %s entries ...', nEntries)
65 
66  for i in range(nEntries):
67  if tree.GetEntry(i)<0:
68  msg.warning('Event %s of tree %s is corrupted.', i, tree.GetName())
69  return 1
70 
71  # Show a sign of life for long validation jobs: ATLASJT-433
72  if (i%printInterval)==0 and i>0:
73  msg.info('Validated %s events so far ...', i)
74 
75  return 0
76 
77 def checkNTupleEventWise(ntuple, printInterval = 150000):
78 
79  try:
80  reader=RNTupleReader.Open(ntuple)
81  except Exception as err:
82  msg.warning('Could not open ntuple %s: %s', ntuple, err)
83  return 1
84 
85  msg.debug('Checking %s entries ...', reader.GetNEntries())
86 
87  try:
88  entry = reader.CreateEntry()
89  except AttributeError:
90  entry = reader.GetModel().CreateEntry()
91  for i in reader:
92  try:
93  reader.LoadEntry(i, entry)
94  except Exception as err:
95  msg.warning('Event %s of ntuple %s is corrupted: %s', i, reader.GetDescriptor().GetName(), err)
96  return 1
97 
98  # Show a sign of life for long validation jobs: ATLASJT-433
99  if (i%printInterval)==0 and i>0:
100  msg.info('Validated %s events so far ...', i)
101 
102  return 0
103 
105  """Bulk read each top level field cluster by cluster.
106  """
107  from array import array
108  try:
109  from ROOT import RException
110  except ImportError:
111  from ROOT.Experimental import RException
112 
113  try:
114  reader=RNTupleReader.Open(ntuple)
115  except Exception as err:
116  msg.warning('Could not open ntuple %r: %r', ntuple, err)
117  return 1
118 
119  try:
120  descriptor = reader.GetDescriptor()
121  msg.debug(f"ntupleName={descriptor.GetName()}")
122 
123  model = reader.GetModel()
124  try:
125  fieldZero = model.GetFieldZero()
126  except AttributeError:
127  # ROOT Version: 6.35.01
128  fieldZero = model.GetConstFieldZero()
129  try:
130  subFields = fieldZero.GetSubFields()
131  except AttributeError:
132  subFields = fieldZero.GetConstSubfields()
133  msg.debug(f"Top level fields number {subFields.size()}")
134  for field in subFields:
135  msg.debug(f"fieldName={field.GetFieldName()} typeName={field.GetTypeName()}")
136  bulk = model.CreateBulk(field.GetFieldName())
137 
138  for clusterDescriptor in descriptor.GetClusterIterable():
139  try:
140  clusterIndex = ROOT.Experimental.RClusterIndex(clusterDescriptor.GetId(), 0)
141  except AttributeError:
142  # ROOT Version: 6.35.01
143  clusterIndex = ROOT.RNTupleLocalIndex(clusterDescriptor.GetId(), 0)
144  size = int(clusterDescriptor.GetNEntries())
145  maskReq = array('b', (True for i in range(size)))
146  msg.debug(f" cluster #{clusterIndex.GetClusterId()}"
147  f" firstEntryIndex={clusterDescriptor.GetFirstEntryIndex()}"
148  f" nEntries={size}")
149  values = bulk.ReadBulk(clusterIndex, maskReq, size)
150  msg.debug(f" values array at {values}")
151 
152  except RException as err:
153  from traceback import format_exception
154  msg.error("Exception reading ntuple %r\n%s", ntuple, "".join(format_exception(err)))
155  return 1
156 
157  return 0
158 
159 def checkDirectory(directory, the_type, requireTree, depth):
160 
161  from PyUtils import PoolFile
162  nentries = None
163  hasMetadata = False
164 
165  msg.debug('Checking directory %s ...', directory.GetName())
166 
167  listOfKeys=directory.GetListOfKeys()
168 
169  msg.debug('Checking %s keys ... ', listOfKeys.GetEntries())
170 
171  for key in listOfKeys:
172 
173  msg.debug('Looking at key %s ...', key.GetName())
174  msg.debug('Key is of class %s.', key.GetClassName())
175 
176  the_object=directory.Get(key.GetName())
177  if not the_object:
178  msg.warning("Can't get object of key %s.", key.GetName())
179  return 1
180 
181  if requireTree and not isinstance(the_object, TTree):
182  msg.warning("Object of key %s is not of class TTree!", key.GetName())
183  return 1
184 
185  if isinstance(the_object,TTree):
186 
187  msg.debug('Checking tree %s ...', the_object.GetName())
188 
189  if depth == 0:
190  if PoolFile.PoolOpts.TTreeNames.EventData == the_object.GetName():
191  nentries = the_object.GetEntries()
192  msg.debug(f' contains {nentries} events')
193  elif PoolFile.PoolOpts.TTreeNames.MetaData == the_object.GetName():
194  hasMetadata = True
195  msg.debug(' contains MetaData')
196 
197  if the_type=='event':
198  if checkTreeEventWise(the_object)==1:
199  return 1
200  elif the_type=='basket':
201  if checkTreeBasketWise(the_object)==1:
202  return 1
203 
204  msg.debug('Tree %s looks ok.', the_object.GetName())
205 
206  if isRNTuple(the_object):
207 
208  msg.debug('Checking ntuple of key %s ...', key.GetName())
209 
210  try:
211  reader=RNTupleReader.Open(the_object)
212  except Exception as err:
213  msg.warning('Could not open ntuple %s: %s', the_object, err)
214  return 1
215 
216  if depth == 0:
217  if PoolFile.PoolOpts.RNTupleNames.EventData == reader.GetDescriptor().GetName():
218  nentries = reader.GetNEntries()
219  msg.debug(f' contains {nentries} events')
220  elif PoolFile.PoolOpts.RNTupleNames.MetaData == reader.GetDescriptor().GetName():
221  hasMetadata = True
222  msg.debug(' contains MetaData')
223 
224  if the_type=='event':
225  if checkNTupleEventWise(the_object)==1:
226  return 1
227  elif the_type=='basket':
228  if checkNTupleFieldWise(the_object)==1:
229  return 1
230 
231  msg.debug('NTuple of key %s looks ok.', key.GetName())
232 
233  if isinstance(the_object, TDirectory):
234  if checkDirectory(the_object, the_type, requireTree, depth + 1)==1:
235  return 1
236 
237  # Only check if metadata object is available as in standard POOL files
238  if depth == 0 and hasMetadata and checkNEvents(directory.GetName(), nentries)==1:
239  return 1
240  else:
241  msg.debug('Directory %s looks ok.', directory.GetName())
242  return 0
243 
244 
245 def checkFile(fileName, the_type, requireTree):
246 
247  msg.info('Checking file %s ...', fileName)
248 
249  enabledIMT = False
250  if not ROOT.ROOT.IsImplicitMTEnabled() and 'TRF_MULTITHREADED_VALIDATION' in os.environ and 'ATHENA_CORE_NUMBER' in os.environ:
251  if (nThreads := int(os.environ['ATHENA_CORE_NUMBER'])) >= 0:
252  msg.info(f"Setting the number of implicit ROOT threads to {nThreads}")
253  ROOT.ROOT.EnableImplicitMT(nThreads)
254  enabledIMT = True
255  else:
256  msg.warning(f"Ignored negative ATHENA_CORE_NUMBER ({nThreads})")
257 
258  file_handle=TFile.Open(fileName)
259 
260  if not file_handle:
261  msg.warning("Can't access file %s.", fileName)
262  return 1
263 
264  if not file_handle.IsOpen():
265  msg.warning("Can't open file %s.", fileName)
266  return 1
267 
268  if file_handle.IsZombie():
269  msg.warning("File %s is a zombie.", fileName)
270  file_handle.Close()
271  return 1
272 
273  if file_handle.TestBit(TFile.kRecovered):
274  msg.warning("File %s needed to be recovered.", fileName)
275  file_handle.Close()
276  return 1
277 
278  if checkDirectory(file_handle, the_type, requireTree, 0)==1:
279  msg.warning("File %s is corrupted.", fileName)
280  file_handle.Close()
281  return 1
282 
283  file_handle.Close()
284  msg.info("File %s looks ok.", fileName)
285 
286  if enabledIMT:
287  ROOT.ROOT.DisableImplicitMT()
288 
289  return 0
290 
291 
292 def checkNEvents(fileName, nEntries):
293  """Check consistency of number of events in file with metadata.
294 
295  fileName name of file to check consistency of
296  nEntries number of events in fileName (e.g., obtained by examining event data object)
297  return 0 in case of consistency, 1 otherwise
298  """
299  from PyUtils.MetaReader import read_metadata
300 
301  msg.debug('Checking number of events in file %s ...', fileName)
302 
303  meta = read_metadata(fileName, mode='lite')[fileName]
304  msg.debug(' according to metadata: {0}'.format(meta["nentries"]))
305  msg.debug(' according to event data: {0}'.format(nEntries))
306  if meta["nentries"] and nEntries and meta["nentries"] != nEntries \
307  or meta["nentries"] and not nEntries \
308  or not meta["nentries"] and nEntries:
309  msg.warning(f' number of events ({nEntries}) inconsistent with metadata ({meta["nentries"]}) in file {fileName!r}.')
310  return 1
311  else:
312  msg.debug(" looks ok.")
313  return 0
314 
315 def usage():
316  print("Usage: validate filename type requireTree verbosity")
317  print("'type' must be either 'event' or 'basket'")
318  print("'requireTree' must be either 'true' or 'false'")
319  print("'verbosity' must be either 'on' or 'off'")
320 
321  return 2
322 
323 
324 def main(argv):
325 
326  clock=TStopwatch()
327 
328  argc=len(argv)
329 
330  if (argc!=5):
331  return usage()
332 
333  fileName=argv[1]
334  the_type=argv[2]
335  requireTree=argv[3]
336  verbosity=argv[4]
337 
338 
339  if the_type!="event" and the_type!="basket":
340  return usage()
341 
342  if requireTree=="true":
343  requireTree=True
344  elif requireTree=="false":
345  requireTree=False
346  else:
347  return usage()
348 
349  if verbosity=="on":
350  msg.setLevel(logging.DEBUG)
351  elif verbosity=="off":
352  msg.setLevel(logging.INFO)
353  else:
354  return usage()
355 
356  rc=checkFile(fileName,the_type, requireTree)
357  msg.debug('Returning %s', rc)
358 
359  clock.Stop()
360  clock.Print()
361 
362  return rc
363 
364 
365 if __name__ == '__main__':
366 
367  ch=logging.StreamHandler(sys.stdout)
368  formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
369  ch.setFormatter(formatter)
370  msg.addHandler(ch)
371 
372  rc=main(sys.argv)
373  sys.exit(rc)
374 
python.trfValidateRootFile.usage
def usage()
Definition: trfValidateRootFile.py:315
vtune_athena.format
format
Definition: vtune_athena.py:14
python.MetaReader.read_metadata
def read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter=None, unique_tag_info_values=True, ignoreNonExistingLocalFiles=False)
Definition: MetaReader.py:53
python.trfValidateRootFile.checkTreeBasketWise
def checkTreeBasketWise(tree)
Definition: trfValidateRootFile.py:46
python.trfValidateRootFile.checkNTupleFieldWise
def checkNTupleFieldWise(ntuple)
Definition: trfValidateRootFile.py:104
python.trfValidateRootFile.checkDirectory
def checkDirectory(directory, the_type, requireTree, depth)
Definition: trfValidateRootFile.py:159
python.trfValidateRootFile.main
def main(argv)
Definition: trfValidateRootFile.py:324
python.trfValidateRootFile.checkNEvents
def checkNEvents(fileName, nEntries)
Definition: trfValidateRootFile.py:292
python.LArMinBiasAlgConfig.int
int
Definition: LArMinBiasAlgConfig.py:59
python.trfValidateRootFile.checkNTupleEventWise
def checkNTupleEventWise(ntuple, printInterval=150000)
Definition: trfValidateRootFile.py:77
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
checkFile
Definition: checkFile.py:1
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:25
array
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.PoolFile.isRNTuple
def isRNTuple(obj)
Definition: PoolFile.py:36
ROOT::Experimental
Definition: RNTCollection.h:26
python.trfValidateRootFile.checkBranch
def checkBranch(branch)
Definition: trfValidateRootFile.py:22
python.trfValidateRootFile.checkFile
def checkFile(fileName, the_type, requireTree)
Definition: trfValidateRootFile.py:245
python.trfValidateRootFile.checkTreeEventWise
def checkTreeEventWise(tree, printInterval=150000)
Definition: trfValidateRootFile.py:60