ATLAS Offline Software
trfValidateRootFile.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
4 
5 
9 
10 
11 import sys, os
12 import logging
13 
14 from PyUtils import RootUtils
15 ROOT = RootUtils.import_root()
16 from ROOT import TFile, TTree, TDirectory, TStopwatch
17 from ROOT.Experimental import RNTupleReader
18 from PyUtils.PoolFile import isRNTuple
19 
20 msg = logging.getLogger(__name__)
21 
22 def checkBranch(branch):
23 
24  msg.debug('Checking branch %s ...', branch.GetName())
25 
26  nBaskets=branch.GetWriteBasket()
27 
28  msg.debug('Checking %s baskets ...', nBaskets)
29 
30  for iBasket in range(nBaskets):
31  basket=branch.GetBasket(iBasket)
32  if not basket:
33  msg.warning('Basket %s of branch %s is corrupted.', iBasket, branch.GetName() )
34  return 1
35 
36  listOfSubBranches=branch.GetListOfBranches()
37  msg.debug('Checking %s subbranches ...', listOfSubBranches.GetEntries())
38  for subBranch in listOfSubBranches:
39  if checkBranch(subBranch)==1:
40  return 1
41 
42  msg.debug('Branch %s looks ok.', branch.GetName())
43  return 0
44 
45 
47 
48  listOfBranches=tree.GetListOfBranches()
49 
50  msg.debug('Checking %s branches ...', listOfBranches.GetEntries())
51 
52  for branch in listOfBranches:
53  if checkBranch(branch)==1:
54  msg.warning('Tree %s is corrupted (branch %s ).', tree.GetName(), branch.GetName())
55  return 1
56 
57  return 0
58 
59 
60 def checkTreeEventWise(tree, printInterval = 150000):
61 
62  nEntries=tree.GetEntries()
63 
64  msg.debug('Checking %s entries ...', nEntries)
65 
66  for i in range(nEntries):
67  if tree.GetEntry(i)<0:
68  msg.warning('Event %s of tree %s is corrupted.', i, tree.GetName())
69  return 1
70 
71  # Show a sign of life for long validation jobs: ATLASJT-433
72  if (i%printInterval)==0 and i>0:
73  msg.info('Validated %s events so far ...', i)
74 
75  return 0
76 
77 def checkNTupleEventWise(ntuple, printInterval = 150000):
78 
79  try:
80  reader=RNTupleReader.Open(ntuple)
81  except Exception as err:
82  msg.warning('Could not open ntuple %s: %s', ntuple, err)
83  return 1
84 
85  msg.debug('Checking %s entries ...', reader.GetNEntries())
86 
87  for i in reader:
88  try:
89  try:
90  entry = reader.CreateEntry()
91  except AttributeError:
92  entry = reader.GetModel().CreateEntry()
93  reader.LoadEntry(i, entry)
94  except Exception as err:
95  msg.warning('Event %s of ntuple %s is corrupted: %s', i, reader.GetDescriptor().GetName(), err)
96  return 1
97 
98  # Show a sign of life for long validation jobs: ATLASJT-433
99  if (i%printInterval)==0 and i>0:
100  msg.info('Validated %s events so far ...', i)
101 
102  return 0
103 
105  """Bulk read each top level field cluster by cluster.
106  """
107  from array import array
108  try:
109  from ROOT import RException
110  except ImportError:
111  from ROOT.Experimental import RException
112 
113  try:
114  reader=RNTupleReader.Open(ntuple)
115  except Exception as err:
116  msg.warning('Could not open ntuple %r: %r', ntuple, err)
117  return 1
118 
119  try:
120  descriptor = reader.GetDescriptor()
121  msg.debug(f"ntupleName={descriptor.GetName()}")
122 
123  model = reader.GetModel()
124  fieldZero = model.GetFieldZero()
125  subFields = fieldZero.GetSubFields()
126  msg.debug(f"Top level fields number {subFields.size()}")
127  for field in subFields:
128  msg.debug(f"fieldName={field.GetFieldName()} typeName={field.GetTypeName()}")
129  bulk = model.CreateBulk(field.GetFieldName())
130 
131  for clusterDescriptor in descriptor.GetClusterIterable():
132  clusterIndex = ROOT.Experimental.RClusterIndex(clusterDescriptor.GetId(), 0)
133  size = int(clusterDescriptor.GetNEntries())
134  maskReq = array('b', (True for i in range(size)))
135  msg.debug(f" cluster #{clusterIndex.GetClusterId()}"
136  f" firstEntryIndex={clusterDescriptor.GetFirstEntryIndex()}"
137  f" nEntries={size}")
138  values = bulk.ReadBulk(clusterIndex, maskReq, size)
139  msg.debug(f" values array at {values}")
140 
141  except RException as err:
142  from traceback import format_exception
143  msg.error("Exception reading ntuple %r\n%s", ntuple, "".join(format_exception(err)))
144  return 1
145 
146  return 0
147 
148 def checkDirectory(directory, the_type, requireTree, depth):
149 
150  from PyUtils import PoolFile
151  nentries = None
152  hasMetadata = False
153 
154  msg.debug('Checking directory %s ...', directory.GetName())
155 
156  listOfKeys=directory.GetListOfKeys()
157 
158  msg.debug('Checking %s keys ... ', listOfKeys.GetEntries())
159 
160  for key in listOfKeys:
161 
162  msg.debug('Looking at key %s ...', key.GetName())
163  msg.debug('Key is of class %s.', key.GetClassName())
164 
165  the_object=directory.Get(key.GetName())
166  if not the_object:
167  msg.warning("Can't get object of key %s.", key.GetName())
168  return 1
169 
170  if requireTree and not isinstance(the_object, TTree):
171  msg.warning("Object of key %s is not of class TTree!", key.GetName())
172  return 1
173 
174  if isinstance(the_object,TTree):
175 
176  msg.debug('Checking tree %s ...', the_object.GetName())
177 
178  if depth == 0:
179  if PoolFile.PoolOpts.TTreeNames.EventData == the_object.GetName():
180  nentries = the_object.GetEntries()
181  msg.debug(f' contains {nentries} events')
182  elif PoolFile.PoolOpts.TTreeNames.MetaData == the_object.GetName():
183  hasMetadata = True
184  msg.debug(' contains MetaData')
185 
186  if the_type=='event':
187  if checkTreeEventWise(the_object)==1:
188  return 1
189  elif the_type=='basket':
190  if checkTreeBasketWise(the_object)==1:
191  return 1
192 
193  msg.debug('Tree %s looks ok.', the_object.GetName())
194 
195  if isRNTuple(the_object):
196 
197  msg.debug('Checking ntuple of key %s ...', key.GetName())
198 
199  try:
200  reader=RNTupleReader.Open(the_object)
201  except Exception as err:
202  msg.warning('Could not open ntuple %s: %s', the_object, err)
203  return 1
204 
205  if depth == 0:
206  if PoolFile.PoolOpts.RNTupleNames.EventData == reader.GetDescriptor().GetName():
207  nentries = reader.GetNEntries()
208  msg.debug(f' contains {nentries} events')
209  elif PoolFile.PoolOpts.RNTupleNames.MetaData == reader.GetDescriptor().GetName():
210  hasMetadata = True
211  msg.debug(' contains MetaData')
212 
213  if the_type=='event':
214  if checkNTupleEventWise(the_object)==1:
215  return 1
216  elif the_type=='basket':
217  if checkNTupleFieldWise(the_object)==1:
218  return 1
219 
220  msg.debug('NTuple of key %s looks ok.', key.GetName())
221 
222  if isinstance(the_object, TDirectory):
223  if checkDirectory(the_object, the_type, requireTree, depth + 1)==1:
224  return 1
225 
226  # Only check if metadata object is available as in standard POOL files
227  if depth == 0 and hasMetadata and checkNEvents(directory.GetName(), nentries)==1:
228  return 1
229  else:
230  msg.debug('Directory %s looks ok.', directory.GetName())
231  return 0
232 
233 
234 def checkFile(fileName, the_type, requireTree):
235 
236  msg.info('Checking file %s ...', fileName)
237 
238  isIMTEnabled = ROOT.ROOT.IsImplicitMTEnabled()
239  if not isIMTEnabled and 'TRF_MULTITHREADED_VALIDATION' in os.environ and 'ATHENA_CORE_NUMBER' in os.environ:
240  nThreads = int(os.environ['ATHENA_CORE_NUMBER'])
241  msg.info(f"Setting the number of implicit ROOT threads to {nThreads}")
242  ROOT.ROOT.EnableImplicitMT(nThreads)
243 
244  file_handle=TFile.Open(fileName)
245 
246  if not file_handle:
247  msg.warning("Can't access file %s.", fileName)
248  return 1
249 
250  if not file_handle.IsOpen():
251  msg.warning("Can't open file %s.", fileName)
252  return 1
253 
254  if file_handle.IsZombie():
255  msg.warning("File %s is a zombie.", fileName)
256  file_handle.Close()
257  return 1
258 
259  if file_handle.TestBit(TFile.kRecovered):
260  msg.warning("File %s needed to be recovered.", fileName)
261  file_handle.Close()
262  return 1
263 
264  if checkDirectory(file_handle, the_type, requireTree, 0)==1:
265  msg.warning("File %s is corrupted.", fileName)
266  file_handle.Close()
267  return 1
268 
269  file_handle.Close()
270  msg.info("File %s looks ok.", fileName)
271 
272  if not isIMTEnabled and 'TRF_MULTITHREADED_VALIDATION' in os.environ and 'ATHENA_CORE_NUMBER' in os.environ:
273  ROOT.ROOT.DisableImplicitMT()
274 
275  return 0
276 
277 
278 def checkNEvents(fileName, nEntries):
279  """Check consistency of number of events in file with metadata.
280 
281  fileName name of file to check consistency of
282  nEntries number of events in fileName (e.g., obtained by examining event data object)
283  return 0 in case of consistency, 1 otherwise
284  """
285  from PyUtils.MetaReader import read_metadata
286 
287  msg.debug('Checking number of events in file %s ...', fileName)
288 
289  meta = read_metadata(fileName, mode='lite')[fileName]
290  msg.debug(' according to metadata: {0}'.format(meta["nentries"]))
291  msg.debug(' according to event data: {0}'.format(nEntries))
292  if meta["nentries"] and nEntries and meta["nentries"] != nEntries \
293  or meta["nentries"] and not nEntries \
294  or not meta["nentries"] and nEntries:
295  msg.warning(f' number of events ({nEntries}) inconsistent with metadata ({meta["nentries"]}) in file {fileName!r}.')
296  return 1
297  else:
298  msg.debug(" looks ok.")
299  return 0
300 
301 def usage():
302  print("Usage: validate filename type requireTree verbosity")
303  print("'type' must be either 'event' or 'basket'")
304  print("'requireTree' must be either 'true' or 'false'")
305  print("'verbosity' must be either 'on' or 'off'")
306 
307  return 2
308 
309 
310 def main(argv):
311 
312  clock=TStopwatch()
313 
314  argc=len(argv)
315 
316  if (argc!=5):
317  return usage()
318 
319  fileName=argv[1]
320  the_type=argv[2]
321  requireTree=argv[3]
322  verbosity=argv[4]
323 
324 
325  if the_type!="event" and the_type!="basket":
326  return usage()
327 
328  if requireTree=="true":
329  requireTree=True
330  elif requireTree=="false":
331  requireTree=False
332  else:
333  return usage()
334 
335  if verbosity=="on":
336  msg.setLevel(logging.DEBUG)
337  elif verbosity=="off":
338  msg.setLevel(logging.INFO)
339  else:
340  return usage()
341 
342  rc=checkFile(fileName,the_type, requireTree)
343  msg.debug('Returning %s', rc)
344 
345  clock.Stop()
346  clock.Print()
347 
348  return rc
349 
350 
351 if __name__ == '__main__':
352 
353  ch=logging.StreamHandler(sys.stdout)
354  formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
355  ch.setFormatter(formatter)
356  msg.addHandler(ch)
357 
358  rc=main(sys.argv)
359  sys.exit(rc)
360 
python.trfValidateRootFile.usage
def usage()
Definition: trfValidateRootFile.py:301
vtune_athena.format
format
Definition: vtune_athena.py:14
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
python.MetaReader.read_metadata
def read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter=None, unique_tag_info_values=True, ignoreNonExistingLocalFiles=False)
Definition: MetaReader.py:53
python.trfValidateRootFile.checkTreeBasketWise
def checkTreeBasketWise(tree)
Definition: trfValidateRootFile.py:46
python.trfValidateRootFile.checkNTupleFieldWise
def checkNTupleFieldWise(ntuple)
Definition: trfValidateRootFile.py:104
python.trfValidateRootFile.checkDirectory
def checkDirectory(directory, the_type, requireTree, depth)
Definition: trfValidateRootFile.py:148
python.trfValidateRootFile.main
def main(argv)
Definition: trfValidateRootFile.py:310
python.trfValidateRootFile.checkNEvents
def checkNEvents(fileName, nEntries)
Definition: trfValidateRootFile.py:278
python.trfValidateRootFile.checkNTupleEventWise
def checkNTupleEventWise(ntuple, printInterval=150000)
Definition: trfValidateRootFile.py:77
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
checkFile
Definition: checkFile.py:1
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:25
array
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.PoolFile.isRNTuple
def isRNTuple(obj)
Definition: PoolFile.py:36
ROOT::Experimental
Definition: RNTCollection.h:28
python.trfValidateRootFile.checkBranch
def checkBranch(branch)
Definition: trfValidateRootFile.py:22
python.trfValidateRootFile.checkFile
def checkFile(fileName, the_type, requireTree)
Definition: trfValidateRootFile.py:234
python.trfValidateRootFile.checkTreeEventWise
def checkTreeEventWise(tree, printInterval=150000)
Definition: trfValidateRootFile.py:60