14 from PyUtils
import RootUtils
15 ROOT = RootUtils.import_root()
16 from ROOT
import TFile, TTree, TDirectory, TStopwatch
18 from ROOT
import RNTupleReader
20 from ROOT.Experimental
import RNTupleReader
21 from PyUtils.PoolFile
import isRNTuple
23 msg = logging.getLogger(__name__)
27 msg.debug(
'Checking branch %s ...', branch.GetName())
29 nBaskets=branch.GetWriteBasket()
31 msg.debug(
'Checking %s baskets ...', nBaskets)
33 for iBasket
in range(nBaskets):
34 basket=branch.GetBasket(iBasket)
36 msg.warning(
'Basket %s of branch %s is corrupted.', iBasket, branch.GetName() )
39 listOfSubBranches=branch.GetListOfBranches()
40 msg.debug(
'Checking %s subbranches ...', listOfSubBranches.GetEntries())
41 for subBranch
in listOfSubBranches:
45 msg.debug(
'Branch %s looks ok.', branch.GetName())
51 listOfBranches=tree.GetListOfBranches()
53 msg.debug(
'Checking %s branches ...', listOfBranches.GetEntries())
55 for branch
in listOfBranches:
57 msg.warning(
'Tree %s is corrupted (branch %s ).', tree.GetName(), branch.GetName())
65 nEntries=tree.GetEntries()
67 msg.debug(
'Checking %s entries ...', nEntries)
69 for i
in range(nEntries):
70 if tree.GetEntry(i)<0:
71 msg.warning(
'Event %s of tree %s is corrupted.', i, tree.GetName())
75 if (i%printInterval)==0
and i>0:
76 msg.info(
'Validated %s events so far ...', i)
83 reader=RNTupleReader.Open(ntuple)
84 except Exception
as err:
85 msg.warning(
'Could not open ntuple %s: %s', ntuple, err)
88 msg.debug(
'Checking %s entries ...', reader.GetNEntries())
91 entry = reader.CreateEntry()
92 except AttributeError:
93 entry = reader.GetModel().CreateEntry()
96 reader.LoadEntry(i, entry)
97 except Exception
as err:
98 msg.warning(
'Event %s of ntuple %s is corrupted: %s', i, reader.GetDescriptor().GetName(), err)
102 if (i%printInterval)==0
and i>0:
103 msg.info(
'Validated %s events so far ...', i)
108 """Bulk read each top level field cluster by cluster.
110 from array
import array
112 from ROOT
import RException
114 from ROOT.Experimental
import RException
117 reader=RNTupleReader.Open(ntuple)
118 except Exception
as err:
119 msg.warning(
'Could not open ntuple %r: %r', ntuple, err)
123 descriptor = reader.GetDescriptor()
124 msg.debug(f
"ntupleName={descriptor.GetName()}")
126 model = reader.GetModel()
128 fieldZero = model.GetFieldZero()
129 except AttributeError:
131 fieldZero = model.GetConstFieldZero()
133 subFields = fieldZero.GetSubFields()
134 except AttributeError:
135 subFields = fieldZero.GetConstSubfields()
136 msg.debug(f
"Top level fields number {subFields.size()}")
137 for field
in subFields:
138 msg.debug(f
"fieldName={field.GetFieldName()} typeName={field.GetTypeName()}")
139 bulk = model.CreateBulk(field.GetFieldName())
141 for clusterDescriptor
in descriptor.GetClusterIterable():
143 clusterIndex = ROOT.Experimental.RClusterIndex(clusterDescriptor.GetId(), 0)
144 except AttributeError:
146 clusterIndex = ROOT.RNTupleLocalIndex(clusterDescriptor.GetId(), 0)
147 size =
int(clusterDescriptor.GetNEntries())
148 maskReq =
array(
'b', (
True for i
in range(size)))
149 msg.debug(f
" cluster #{clusterIndex.GetClusterId()}"
150 f
" firstEntryIndex={clusterDescriptor.GetFirstEntryIndex()}"
152 values = bulk.ReadBulk(clusterIndex, maskReq, size)
153 msg.debug(f
" values array at {values}")
155 except RException
as err:
156 from traceback
import format_exception
157 msg.error(
"Exception reading ntuple %r\n%s", ntuple,
"".
join(format_exception(err)))
164 from PyUtils
import PoolFile
168 msg.debug(
'Checking directory %s ...', directory.GetName())
170 listOfKeys=directory.GetListOfKeys()
172 msg.debug(
'Checking %s keys ... ', listOfKeys.GetEntries())
174 for key
in listOfKeys:
176 msg.debug(
'Looking at key %s ...', key.GetName())
177 msg.debug(
'Key is of class %s.', key.GetClassName())
179 the_object=directory.Get(key.GetName())
181 msg.warning(
"Can't get object of key %s.", key.GetName())
184 if requireTree
and not isinstance(the_object, TTree):
185 msg.warning(
"Object of key %s is not of class TTree!", key.GetName())
188 if isinstance(the_object,TTree):
190 msg.debug(
'Checking tree %s ...', the_object.GetName())
193 if PoolFile.PoolOpts.TTreeNames.EventData == the_object.GetName():
194 nentries = the_object.GetEntries()
195 msg.debug(f
' contains {nentries} events')
196 elif PoolFile.PoolOpts.TTreeNames.MetaData == the_object.GetName():
198 msg.debug(
' contains MetaData')
200 if the_type==
'event':
203 elif the_type==
'basket':
207 msg.debug(
'Tree %s looks ok.', the_object.GetName())
211 msg.debug(
'Checking ntuple of key %s ...', key.GetName())
214 reader=RNTupleReader.Open(the_object)
215 except Exception
as err:
216 msg.warning(
'Could not open ntuple %s: %s', the_object, err)
220 if PoolFile.PoolOpts.RNTupleNames.EventData == reader.GetDescriptor().GetName():
221 nentries = reader.GetNEntries()
222 msg.debug(f
' contains {nentries} events')
223 elif PoolFile.PoolOpts.RNTupleNames.MetaData == reader.GetDescriptor().GetName():
225 msg.debug(
' contains MetaData')
227 if the_type==
'event':
230 elif the_type==
'basket':
234 msg.debug(
'NTuple of key %s looks ok.', key.GetName())
236 if isinstance(the_object, TDirectory):
237 if checkDirectory(the_object, the_type, requireTree, depth + 1)==1:
241 if depth == 0
and hasMetadata
and checkNEvents(directory.GetName(), nentries)==1:
244 msg.debug(
'Directory %s looks ok.', directory.GetName())
250 msg.info(
'Checking file %s ...', fileName)
253 if not ROOT.ROOT.IsImplicitMTEnabled()
and 'TRF_MULTITHREADED_VALIDATION' in os.environ
and 'ATHENA_CORE_NUMBER' in os.environ:
254 if (nThreads :=
int(os.environ[
'ATHENA_CORE_NUMBER'])) >= 0:
255 msg.info(f
"Setting the number of implicit ROOT threads to {nThreads}")
256 ROOT.ROOT.EnableImplicitMT(nThreads)
259 msg.warning(f
"Ignored negative ATHENA_CORE_NUMBER ({nThreads})")
261 file_handle=TFile.Open(fileName)
264 msg.warning(
"Can't access file %s.", fileName)
267 if not file_handle.IsOpen():
268 msg.warning(
"Can't open file %s.", fileName)
271 if file_handle.IsZombie():
272 msg.warning(
"File %s is a zombie.", fileName)
276 if file_handle.TestBit(TFile.kRecovered):
277 msg.warning(
"File %s needed to be recovered.", fileName)
282 msg.warning(
"File %s is corrupted.", fileName)
287 msg.info(
"File %s looks ok.", fileName)
290 ROOT.ROOT.DisableImplicitMT()
296 """Check consistency of number of events in file with metadata.
298 fileName name of file to check consistency of
299 nEntries number of events in fileName (e.g., obtained by examining event data object)
300 return 0 in case of consistency, 1 otherwise
302 from PyUtils.MetaReader
import read_metadata
304 msg.debug(
'Checking number of events in file %s ...', fileName)
307 msg.debug(
' according to metadata: {0}'.
format(meta[
"nentries"]))
308 msg.debug(
' according to event data: {0}'.
format(nEntries))
309 if meta[
"nentries"]
and nEntries
and meta[
"nentries"] != nEntries \
310 or meta[
"nentries"]
and not nEntries \
311 or not meta[
"nentries"]
and nEntries:
312 msg.warning(f
' number of events ({nEntries}) inconsistent with metadata ({meta["nentries"]}) in file {fileName!r}.')
315 msg.debug(
" looks ok.")
319 print(
"Usage: validate filename type requireTree verbosity")
320 print(
"'type' must be either 'event' or 'basket'")
321 print(
"'requireTree' must be either 'true' or 'false'")
322 print(
"'verbosity' must be either 'on' or 'off'")
342 if the_type!=
"event" and the_type!=
"basket":
345 if requireTree==
"true":
347 elif requireTree==
"false":
353 msg.setLevel(logging.DEBUG)
354 elif verbosity==
"off":
355 msg.setLevel(logging.INFO)
359 rc=
checkFile(fileName,the_type, requireTree)
360 msg.debug(
'Returning %s', rc)
368 if __name__ ==
'__main__':
370 ch=logging.StreamHandler(sys.stdout)
371 formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
372 ch.setFormatter(formatter)