14 from PyUtils
import RootUtils
15 ROOT = RootUtils.import_root()
16 from ROOT
import TFile, TTree, TDirectory, TStopwatch
18 from PyUtils.PoolFile
import isRNTuple
20 msg = logging.getLogger(__name__)
24 msg.debug(
'Checking branch %s ...', branch.GetName())
26 nBaskets=branch.GetWriteBasket()
28 msg.debug(
'Checking %s baskets ...', nBaskets)
30 for iBasket
in range(nBaskets):
31 basket=branch.GetBasket(iBasket)
33 msg.warning(
'Basket %s of branch %s is corrupted.', iBasket, branch.GetName() )
36 listOfSubBranches=branch.GetListOfBranches()
37 msg.debug(
'Checking %s subbranches ...', listOfSubBranches.GetEntries())
38 for subBranch
in listOfSubBranches:
42 msg.debug(
'Branch %s looks ok.', branch.GetName())
48 listOfBranches=tree.GetListOfBranches()
50 msg.debug(
'Checking %s branches ...', listOfBranches.GetEntries())
52 for branch
in listOfBranches:
54 msg.warning(
'Tree %s is corrupted (branch %s ).', tree.GetName(), branch.GetName())
62 nEntries=tree.GetEntries()
64 msg.debug(
'Checking %s entries ...', nEntries)
66 for i
in range(nEntries):
67 if tree.GetEntry(i)<0:
68 msg.warning(
'Event %s of tree %s is corrupted.', i, tree.GetName())
72 if (i%printInterval)==0
and i>0:
73 msg.info(
'Validated %s events so far ...', i)
80 reader=RNTupleReader.Open(ntuple)
81 except Exception
as err:
82 msg.warning(
'Could not open ntuple %s: %s', ntuple, err)
85 msg.debug(
'Checking %s entries ...', reader.GetNEntries())
88 entry = reader.CreateEntry()
89 except AttributeError:
90 entry = reader.GetModel().CreateEntry()
93 reader.LoadEntry(i, entry)
94 except Exception
as err:
95 msg.warning(
'Event %s of ntuple %s is corrupted: %s', i, reader.GetDescriptor().GetName(), err)
99 if (i%printInterval)==0
and i>0:
100 msg.info(
'Validated %s events so far ...', i)
105 """Bulk read each top level field cluster by cluster.
107 from array
import array
109 from ROOT
import RException
114 reader=RNTupleReader.Open(ntuple)
115 except Exception
as err:
116 msg.warning(
'Could not open ntuple %r: %r', ntuple, err)
120 descriptor = reader.GetDescriptor()
121 msg.debug(f
"ntupleName={descriptor.GetName()}")
123 model = reader.GetModel()
125 fieldZero = model.GetFieldZero()
126 except AttributeError:
128 fieldZero = model.GetConstFieldZero()
130 subFields = fieldZero.GetSubFields()
131 except AttributeError:
132 subFields = fieldZero.GetConstSubfields()
133 msg.debug(f
"Top level fields number {subFields.size()}")
134 for field
in subFields:
135 msg.debug(f
"fieldName={field.GetFieldName()} typeName={field.GetTypeName()}")
136 bulk = model.CreateBulk(field.GetFieldName())
138 for clusterDescriptor
in descriptor.GetClusterIterable():
140 clusterIndex = ROOT.Experimental.RClusterIndex(clusterDescriptor.GetId(), 0)
141 except AttributeError:
143 clusterIndex = ROOT.RNTupleLocalIndex(clusterDescriptor.GetId(), 0)
144 size =
int(clusterDescriptor.GetNEntries())
145 maskReq =
array(
'b', (
True for i
in range(size)))
146 msg.debug(f
" cluster #{clusterIndex.GetClusterId()}"
147 f
" firstEntryIndex={clusterDescriptor.GetFirstEntryIndex()}"
149 values = bulk.ReadBulk(clusterIndex, maskReq, size)
150 msg.debug(f
" values array at {values}")
152 except RException
as err:
153 from traceback
import format_exception
154 msg.error(
"Exception reading ntuple %r\n%s", ntuple,
"".
join(format_exception(err)))
161 from PyUtils
import PoolFile
165 msg.debug(
'Checking directory %s ...', directory.GetName())
167 listOfKeys=directory.GetListOfKeys()
169 msg.debug(
'Checking %s keys ... ', listOfKeys.GetEntries())
171 for key
in listOfKeys:
173 msg.debug(
'Looking at key %s ...', key.GetName())
174 msg.debug(
'Key is of class %s.', key.GetClassName())
176 the_object=directory.Get(key.GetName())
178 msg.warning(
"Can't get object of key %s.", key.GetName())
181 if requireTree
and not isinstance(the_object, TTree):
182 msg.warning(
"Object of key %s is not of class TTree!", key.GetName())
185 if isinstance(the_object,TTree):
187 msg.debug(
'Checking tree %s ...', the_object.GetName())
190 if PoolFile.PoolOpts.TTreeNames.EventData == the_object.GetName():
191 nentries = the_object.GetEntries()
192 msg.debug(f
' contains {nentries} events')
193 elif PoolFile.PoolOpts.TTreeNames.MetaData == the_object.GetName():
195 msg.debug(
' contains MetaData')
197 if the_type==
'event':
200 elif the_type==
'basket':
204 msg.debug(
'Tree %s looks ok.', the_object.GetName())
208 msg.debug(
'Checking ntuple of key %s ...', key.GetName())
211 reader=RNTupleReader.Open(the_object)
212 except Exception
as err:
213 msg.warning(
'Could not open ntuple %s: %s', the_object, err)
217 if PoolFile.PoolOpts.RNTupleNames.EventData == reader.GetDescriptor().GetName():
218 nentries = reader.GetNEntries()
219 msg.debug(f
' contains {nentries} events')
220 elif PoolFile.PoolOpts.RNTupleNames.MetaData == reader.GetDescriptor().GetName():
222 msg.debug(
' contains MetaData')
224 if the_type==
'event':
227 elif the_type==
'basket':
231 msg.debug(
'NTuple of key %s looks ok.', key.GetName())
233 if isinstance(the_object, TDirectory):
234 if checkDirectory(the_object, the_type, requireTree, depth + 1)==1:
238 if depth == 0
and hasMetadata
and checkNEvents(directory.GetName(), nentries)==1:
241 msg.debug(
'Directory %s looks ok.', directory.GetName())
247 msg.info(
'Checking file %s ...', fileName)
250 if not ROOT.ROOT.IsImplicitMTEnabled()
and 'TRF_MULTITHREADED_VALIDATION' in os.environ
and 'ATHENA_CORE_NUMBER' in os.environ:
251 if (nThreads :=
int(os.environ[
'ATHENA_CORE_NUMBER'])) >= 0:
252 msg.info(f
"Setting the number of implicit ROOT threads to {nThreads}")
253 ROOT.ROOT.EnableImplicitMT(nThreads)
256 msg.warning(f
"Ignored negative ATHENA_CORE_NUMBER ({nThreads})")
258 file_handle=TFile.Open(fileName)
261 msg.warning(
"Can't access file %s.", fileName)
264 if not file_handle.IsOpen():
265 msg.warning(
"Can't open file %s.", fileName)
268 if file_handle.IsZombie():
269 msg.warning(
"File %s is a zombie.", fileName)
273 if file_handle.TestBit(TFile.kRecovered):
274 msg.warning(
"File %s needed to be recovered.", fileName)
279 msg.warning(
"File %s is corrupted.", fileName)
284 msg.info(
"File %s looks ok.", fileName)
287 ROOT.ROOT.DisableImplicitMT()
293 """Check consistency of number of events in file with metadata.
295 fileName name of file to check consistency of
296 nEntries number of events in fileName (e.g., obtained by examining event data object)
297 return 0 in case of consistency, 1 otherwise
299 from PyUtils.MetaReader
import read_metadata
301 msg.debug(
'Checking number of events in file %s ...', fileName)
304 msg.debug(
' according to metadata: {0}'.
format(meta[
"nentries"]))
305 msg.debug(
' according to event data: {0}'.
format(nEntries))
306 if meta[
"nentries"]
and nEntries
and meta[
"nentries"] != nEntries \
307 or meta[
"nentries"]
and not nEntries \
308 or not meta[
"nentries"]
and nEntries:
309 msg.warning(f
' number of events ({nEntries}) inconsistent with metadata ({meta["nentries"]}) in file {fileName!r}.')
312 msg.debug(
" looks ok.")
316 print(
"Usage: validate filename type requireTree verbosity")
317 print(
"'type' must be either 'event' or 'basket'")
318 print(
"'requireTree' must be either 'true' or 'false'")
319 print(
"'verbosity' must be either 'on' or 'off'")
339 if the_type!=
"event" and the_type!=
"basket":
342 if requireTree==
"true":
344 elif requireTree==
"false":
350 msg.setLevel(logging.DEBUG)
351 elif verbosity==
"off":
352 msg.setLevel(logging.INFO)
356 rc=
checkFile(fileName,the_type, requireTree)
357 msg.debug(
'Returning %s', rc)
365 if __name__ ==
'__main__':
367 ch=logging.StreamHandler(sys.stdout)
368 formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
369 ch.setFormatter(formatter)