ATLAS Offline Software
trfValidateRootFile.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
4 
5 
9 
10 
11 import sys, os
12 import logging
13 
14 from PyUtils import RootUtils
15 ROOT = RootUtils.import_root()
16 from ROOT import TFile, TTree, TDirectory, TStopwatch
17 from ROOT.Experimental import RNTuple, RNTupleReader
18 
19 msg = logging.getLogger(__name__)
20 
21 def checkBranch(branch):
22 
23  msg.debug('Checking branch %s ...', branch.GetName())
24 
25  nBaskets=branch.GetWriteBasket()
26 
27  msg.debug('Checking %s baskets ...', nBaskets)
28 
29  for iBasket in range(nBaskets):
30  basket=branch.GetBasket(iBasket)
31  if not basket:
32  msg.warning('Basket %s of branch %s is corrupted.', iBasket, branch.GetName() )
33  return 1
34 
35  listOfSubBranches=branch.GetListOfBranches()
36  msg.debug('Checking %s subbranches ...', listOfSubBranches.GetEntries())
37  for subBranch in listOfSubBranches:
38  if checkBranch(subBranch)==1:
39  return 1
40 
41  msg.debug('Branch %s looks ok.', branch.GetName())
42  return 0
43 
44 
46 
47  listOfBranches=tree.GetListOfBranches()
48 
49  msg.debug('Checking %s branches ...', listOfBranches.GetEntries())
50 
51  for branch in listOfBranches:
52  if checkBranch(branch)==1:
53  msg.warning('Tree %s is corrupted (branch %s ).', tree.GetName(), branch.GetName())
54  return 1
55 
56  return 0
57 
58 
59 def checkTreeEventWise(tree, printInterval = 150000):
60 
61  nEntries=tree.GetEntries()
62 
63  msg.debug('Checking %s entries ...', nEntries)
64 
65  for i in range(nEntries):
66  if tree.GetEntry(i)<0:
67  msg.warning('Event %s of tree %s is corrupted.', i, tree.GetName())
68  return 1
69 
70  # Show a sign of life for long validation jobs: ATLASJT-433
71  if (i%printInterval)==0 and i>0:
72  msg.info('Validated %s events so far ...', i)
73 
74  return 0
75 
76 def checkNTupleEventWise(ntuple, printInterval = 150000):
77 
78  try:
79  reader=RNTupleReader.Open(ntuple)
80  except Exception as err:
81  msg.warning('Could not open ntuple %s: %s', ntuple, err)
82  return 1
83 
84  msg.debug('Checking %s entries ...', reader.GetNEntries())
85 
86  for i in reader:
87  try:
88  reader.LoadEntry(i)
89  except Exception as err:
90  msg.warning('Event %s of ntuple %s is corrupted: %s', i, reader.GetDescriptor().GetName(), err)
91  return 1
92 
93  # Show a sign of life for long validation jobs: ATLASJT-433
94  if (i%printInterval)==0 and i>0:
95  msg.info('Validated %s events so far ...', i)
96 
97  return 0
98 
100  """Bulk read each top level field cluster by cluster.
101  """
102  from array import array
103 
104  try:
105  reader=RNTupleReader.Open(ntuple)
106  except Exception as err:
107  msg.warning('Could not open ntuple %r: %r', ntuple, err)
108  return 1
109 
110  try:
111  descriptor = reader.GetDescriptor()
112  msg.debug(f"ntupleName={descriptor.GetName()}")
113 
114  model = reader.GetModel()
115  fieldZero = model.GetFieldZero()
116  subFields = fieldZero.GetSubFields()
117  msg.debug(f"Top level fields number {subFields.size()}")
118  for field in subFields:
119  msg.debug(f"fieldName={field.GetFieldName()} typeName={field.GetTypeName()}")
120  bulk = model.CreateBulk(field.GetFieldName())
121 
122  for clusterDescriptor in descriptor.GetClusterIterable():
123  clusterIndex = ROOT.Experimental.RClusterIndex(clusterDescriptor.GetId(), 0)
124  size = int(clusterDescriptor.GetNEntries())
125  maskReq = array('b', (True for i in range(size)))
126  msg.debug(f" cluster #{clusterIndex.GetClusterId()}"
127  f" firstEntryIndex={clusterDescriptor.GetFirstEntryIndex()}"
128  f" nEntries={size}")
129  values = bulk.ReadBulk(clusterIndex, maskReq, size)
130  msg.debug(f" values array at {values}")
131 
132  except ROOT.Experimental.RException as err:
133  from traceback import format_exception
134  msg.error("Exception reading ntuple %r\n%s", ntuple, "".join(format_exception(err)))
135  return 1
136 
137  return 0
138 
139 def checkDirectory(directory, the_type, requireTree, depth):
140 
141  from PyUtils import PoolFile
142  nentries = None
143  hasMetadata = False
144 
145  msg.debug('Checking directory %s ...', directory.GetName())
146 
147  listOfKeys=directory.GetListOfKeys()
148 
149  msg.debug('Checking %s keys ... ', listOfKeys.GetEntries())
150 
151  for key in listOfKeys:
152 
153  msg.debug('Looking at key %s ...', key.GetName())
154  msg.debug('Key is of class %s.', key.GetClassName())
155 
156  the_object=directory.Get(key.GetName())
157  if not the_object:
158  msg.warning("Can't get object of key %s.", key.GetName())
159  return 1
160 
161  if requireTree and not isinstance(the_object, TTree):
162  msg.warning("Object of key %s is not of class TTree!", key.GetName())
163  return 1
164 
165  if isinstance(the_object,TTree):
166 
167  msg.debug('Checking tree %s ...', the_object.GetName())
168 
169  if depth == 0:
170  if PoolFile.PoolOpts.TTreeNames.EventData == the_object.GetName():
171  nentries = the_object.GetEntries()
172  msg.debug(f' contains {nentries} events')
173  elif PoolFile.PoolOpts.TTreeNames.MetaData == the_object.GetName():
174  hasMetadata = True
175  msg.debug(' contains MetaData')
176 
177  if the_type=='event':
178  if checkTreeEventWise(the_object)==1:
179  return 1
180  elif the_type=='basket':
181  if checkTreeBasketWise(the_object)==1:
182  return 1
183 
184  msg.debug('Tree %s looks ok.', the_object.GetName())
185 
186  if isinstance(the_object,RNTuple):
187 
188  msg.debug('Checking ntuple of key %s ...', key.GetName())
189 
190  try:
191  reader=RNTupleReader.Open(the_object)
192  except Exception as err:
193  msg.warning('Could not open ntuple %s: %s', the_object, err)
194  return 1
195 
196  if depth == 0:
197  if PoolFile.PoolOpts.RNTupleNames.EventData == reader.GetDescriptor().GetName():
198  nentries = reader.GetNEntries()
199  msg.debug(f' contains {nentries} events')
200  elif PoolFile.PoolOpts.RNTupleNames.MetaData == reader.GetDescriptor().GetName():
201  hasMetadata = True
202  msg.debug(' contains MetaData')
203 
204  if the_type=='event':
205  if checkNTupleEventWise(the_object)==1:
206  return 1
207  elif the_type=='basket':
208  if checkNTupleFieldWise(the_object)==1:
209  return 1
210 
211  msg.debug('NTuple of key %s looks ok.', key.GetName())
212 
213  if isinstance(the_object, TDirectory):
214  if checkDirectory(the_object, the_type, requireTree, depth + 1)==1:
215  return 1
216 
217  # Only check if metadata object is available as in standard POOL files
218  if depth == 0 and hasMetadata and checkNEvents(directory.GetName(), nentries)==1:
219  return 1
220  else:
221  msg.debug('Directory %s looks ok.', directory.GetName())
222  return 0
223 
224 
225 def checkFile(fileName, the_type, requireTree):
226 
227  msg.info('Checking file %s ...', fileName)
228 
229  isIMTEnabled = ROOT.ROOT.IsImplicitMTEnabled()
230  if not isIMTEnabled and 'TRF_MULTITHREADED_VALIDATION' in os.environ and 'ATHENA_CORE_NUMBER' in os.environ:
231  nThreads = int(os.environ['ATHENA_CORE_NUMBER'])
232  msg.info(f"Setting the number of implicit ROOT threads to {nThreads}")
233  ROOT.ROOT.EnableImplicitMT(nThreads)
234 
235  file_handle=TFile.Open(fileName)
236 
237  if not file_handle:
238  msg.warning("Can't access file %s.", fileName)
239  return 1
240 
241  if not file_handle.IsOpen():
242  msg.warning("Can't open file %s.", fileName)
243  return 1
244 
245  if file_handle.IsZombie():
246  msg.warning("File %s is a zombie.", fileName)
247  file_handle.Close()
248  return 1
249 
250  if file_handle.TestBit(TFile.kRecovered):
251  msg.warning("File %s needed to be recovered.", fileName)
252  file_handle.Close()
253  return 1
254 
255  if checkDirectory(file_handle, the_type, requireTree, 0)==1:
256  msg.warning("File %s is corrupted.", fileName)
257  file_handle.Close()
258  return 1
259 
260  file_handle.Close()
261  msg.info("File %s looks ok.", fileName)
262 
263  if not isIMTEnabled and 'TRF_MULTITHREADED_VALIDATION' in os.environ and 'ATHENA_CORE_NUMBER' in os.environ:
264  ROOT.ROOT.DisableImplicitMT()
265 
266  return 0
267 
268 
269 def checkNEvents(fileName, nEntries):
270  """Check consistency of number of events in file with metadata.
271 
272  fileName name of file to check consistency of
273  nEntries number of events in fileName (e.g., obtained by examining event data object)
274  return 0 in case of consistency, 1 otherwise
275  """
276  from PyUtils.MetaReader import read_metadata
277 
278  msg.debug('Checking number of events in file %s ...', fileName)
279 
280  meta = read_metadata(fileName, mode='lite')[fileName]
281  msg.debug(' according to metadata: {0}'.format(meta["nentries"]))
282  msg.debug(' according to event data: {0}'.format(nEntries))
283  if meta["nentries"] and nEntries and meta["nentries"] != nEntries \
284  or meta["nentries"] and not nEntries \
285  or not meta["nentries"] and nEntries:
286  msg.warning(f' number of events ({nEntries}) inconsistent with metadata ({meta["nentries"]}) in file {fileName!r}.')
287  return 1
288  else:
289  msg.debug(" looks ok.")
290  return 0
291 
292 def usage():
293  print("Usage: validate filename type requireTree verbosity")
294  print("'type' must be either 'event' or 'basket'")
295  print("'requireTree' must be either 'true' or 'false'")
296  print("'verbosity' must be either 'on' or 'off'")
297 
298  return 2
299 
300 
301 def main(argv):
302 
303  clock=TStopwatch()
304 
305  argc=len(argv)
306 
307  if (argc!=5):
308  return usage()
309 
310  fileName=argv[1]
311  the_type=argv[2]
312  requireTree=argv[3]
313  verbosity=argv[4]
314 
315 
316  if the_type!="event" and the_type!="basket":
317  return usage()
318 
319  if requireTree=="true":
320  requireTree=True
321  elif requireTree=="false":
322  requireTree=False
323  else:
324  return usage()
325 
326  if verbosity=="on":
327  msg.setLevel(logging.DEBUG)
328  elif verbosity=="off":
329  msg.setLevel(logging.INFO)
330  else:
331  return usage()
332 
333  rc=checkFile(fileName,the_type, requireTree)
334  msg.debug('Returning %s', rc)
335 
336  clock.Stop()
337  clock.Print()
338 
339  return rc
340 
341 
342 if __name__ == '__main__':
343 
344  ch=logging.StreamHandler(sys.stdout)
345  formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
346  ch.setFormatter(formatter)
347  msg.addHandler(ch)
348 
349  rc=main(sys.argv)
350  sys.exit(rc)
351 
python.trfValidateRootFile.usage
def usage()
Definition: trfValidateRootFile.py:292
vtune_athena.format
format
Definition: vtune_athena.py:14
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
python.MetaReader.read_metadata
def read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter=None, unique_tag_info_values=True, ignoreNonExistingLocalFiles=False)
Definition: MetaReader.py:52
python.trfValidateRootFile.checkTreeBasketWise
def checkTreeBasketWise(tree)
Definition: trfValidateRootFile.py:45
python.trfValidateRootFile.checkNTupleFieldWise
def checkNTupleFieldWise(ntuple)
Definition: trfValidateRootFile.py:99
python.trfValidateRootFile.checkDirectory
def checkDirectory(directory, the_type, requireTree, depth)
Definition: trfValidateRootFile.py:139
python.trfValidateRootFile.main
def main(argv)
Definition: trfValidateRootFile.py:301
python.trfValidateRootFile.checkNEvents
def checkNEvents(fileName, nEntries)
Definition: trfValidateRootFile.py:269
python.trfValidateRootFile.checkNTupleEventWise
def checkNTupleEventWise(ntuple, printInterval=150000)
Definition: trfValidateRootFile.py:76
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
checkFile
Definition: checkFile.py:1
array
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
dbg::print
void print(std::FILE *stream, std::format_string< Args... > fmt, Args &&... args)
Definition: SGImplSvc.cxx:70
ROOT::Experimental
Definition: RNTCollection.h:28
python.trfValidateRootFile.checkBranch
def checkBranch(branch)
Definition: trfValidateRootFile.py:21
python.trfValidateRootFile.checkFile
def checkFile(fileName, the_type, requireTree)
Definition: trfValidateRootFile.py:225
python.trfValidateRootFile.checkTreeEventWise
def checkTreeEventWise(tree, printInterval=150000)
Definition: trfValidateRootFile.py:59