14 from PyUtils
import RootUtils
15 ROOT = RootUtils.import_root()
16 from ROOT
import TFile, TTree, TDirectory, TStopwatch
18 from PyUtils.PoolFile
import isRNTuple
20 msg = logging.getLogger(__name__)
24 msg.debug(
'Checking branch %s ...', branch.GetName())
26 nBaskets=branch.GetWriteBasket()
28 msg.debug(
'Checking %s baskets ...', nBaskets)
30 for iBasket
in range(nBaskets):
31 basket=branch.GetBasket(iBasket)
33 msg.warning(
'Basket %s of branch %s is corrupted.', iBasket, branch.GetName() )
36 listOfSubBranches=branch.GetListOfBranches()
37 msg.debug(
'Checking %s subbranches ...', listOfSubBranches.GetEntries())
38 for subBranch
in listOfSubBranches:
42 msg.debug(
'Branch %s looks ok.', branch.GetName())
48 listOfBranches=tree.GetListOfBranches()
50 msg.debug(
'Checking %s branches ...', listOfBranches.GetEntries())
52 for branch
in listOfBranches:
54 msg.warning(
'Tree %s is corrupted (branch %s ).', tree.GetName(), branch.GetName())
62 nEntries=tree.GetEntries()
64 msg.debug(
'Checking %s entries ...', nEntries)
66 for i
in range(nEntries):
67 if tree.GetEntry(i)<0:
68 msg.warning(
'Event %s of tree %s is corrupted.', i, tree.GetName())
72 if (i%printInterval)==0
and i>0:
73 msg.info(
'Validated %s events so far ...', i)
80 reader=RNTupleReader.Open(ntuple)
81 except Exception
as err:
82 msg.warning(
'Could not open ntuple %s: %s', ntuple, err)
85 msg.debug(
'Checking %s entries ...', reader.GetNEntries())
88 entry = reader.CreateEntry()
89 except AttributeError:
90 entry = reader.GetModel().CreateEntry()
93 reader.LoadEntry(i, entry)
94 except Exception
as err:
95 msg.warning(
'Event %s of ntuple %s is corrupted: %s', i, reader.GetDescriptor().GetName(), err)
99 if (i%printInterval)==0
and i>0:
100 msg.info(
'Validated %s events so far ...', i)
105 """Bulk read each top level field cluster by cluster.
107 from array
import array
109 from ROOT
import RException
114 reader=RNTupleReader.Open(ntuple)
115 except Exception
as err:
116 msg.warning(
'Could not open ntuple %r: %r', ntuple, err)
120 descriptor = reader.GetDescriptor()
121 msg.debug(f
"ntupleName={descriptor.GetName()}")
123 model = reader.GetModel()
125 fieldZero = model.GetFieldZero()
126 except AttributeError:
128 fieldZero = model.GetConstFieldZero()
129 subFields = fieldZero.GetSubFields()
130 msg.debug(f
"Top level fields number {subFields.size()}")
131 for field
in subFields:
132 msg.debug(f
"fieldName={field.GetFieldName()} typeName={field.GetTypeName()}")
133 bulk = model.CreateBulk(field.GetFieldName())
135 for clusterDescriptor
in descriptor.GetClusterIterable():
137 clusterIndex = ROOT.Experimental.RClusterIndex(clusterDescriptor.GetId(), 0)
138 except AttributeError:
140 clusterIndex = ROOT.RNTupleLocalIndex(clusterDescriptor.GetId(), 0)
141 size =
int(clusterDescriptor.GetNEntries())
142 maskReq =
array(
'b', (
True for i
in range(size)))
143 msg.debug(f
" cluster #{clusterIndex.GetClusterId()}"
144 f
" firstEntryIndex={clusterDescriptor.GetFirstEntryIndex()}"
146 values = bulk.ReadBulk(clusterIndex, maskReq, size)
147 msg.debug(f
" values array at {values}")
149 except RException
as err:
150 from traceback
import format_exception
151 msg.error(
"Exception reading ntuple %r\n%s", ntuple,
"".
join(format_exception(err)))
158 from PyUtils
import PoolFile
162 msg.debug(
'Checking directory %s ...', directory.GetName())
164 listOfKeys=directory.GetListOfKeys()
166 msg.debug(
'Checking %s keys ... ', listOfKeys.GetEntries())
168 for key
in listOfKeys:
170 msg.debug(
'Looking at key %s ...', key.GetName())
171 msg.debug(
'Key is of class %s.', key.GetClassName())
173 the_object=directory.Get(key.GetName())
175 msg.warning(
"Can't get object of key %s.", key.GetName())
178 if requireTree
and not isinstance(the_object, TTree):
179 msg.warning(
"Object of key %s is not of class TTree!", key.GetName())
182 if isinstance(the_object,TTree):
184 msg.debug(
'Checking tree %s ...', the_object.GetName())
187 if PoolFile.PoolOpts.TTreeNames.EventData == the_object.GetName():
188 nentries = the_object.GetEntries()
189 msg.debug(f
' contains {nentries} events')
190 elif PoolFile.PoolOpts.TTreeNames.MetaData == the_object.GetName():
192 msg.debug(
' contains MetaData')
194 if the_type==
'event':
197 elif the_type==
'basket':
201 msg.debug(
'Tree %s looks ok.', the_object.GetName())
205 msg.debug(
'Checking ntuple of key %s ...', key.GetName())
208 reader=RNTupleReader.Open(the_object)
209 except Exception
as err:
210 msg.warning(
'Could not open ntuple %s: %s', the_object, err)
214 if PoolFile.PoolOpts.RNTupleNames.EventData == reader.GetDescriptor().GetName():
215 nentries = reader.GetNEntries()
216 msg.debug(f
' contains {nentries} events')
217 elif PoolFile.PoolOpts.RNTupleNames.MetaData == reader.GetDescriptor().GetName():
219 msg.debug(
' contains MetaData')
221 if the_type==
'event':
224 elif the_type==
'basket':
228 msg.debug(
'NTuple of key %s looks ok.', key.GetName())
230 if isinstance(the_object, TDirectory):
231 if checkDirectory(the_object, the_type, requireTree, depth + 1)==1:
235 if depth == 0
and hasMetadata
and checkNEvents(directory.GetName(), nentries)==1:
238 msg.debug(
'Directory %s looks ok.', directory.GetName())
244 msg.info(
'Checking file %s ...', fileName)
247 if not ROOT.ROOT.IsImplicitMTEnabled()
and 'TRF_MULTITHREADED_VALIDATION' in os.environ
and 'ATHENA_CORE_NUMBER' in os.environ:
248 if (nThreads :=
int(os.environ[
'ATHENA_CORE_NUMBER'])) >= 0:
249 msg.info(f
"Setting the number of implicit ROOT threads to {nThreads}")
250 ROOT.ROOT.EnableImplicitMT(nThreads)
253 msg.warning(f
"Ignored negative ATHENA_CORE_NUMBER ({nThreads})")
255 file_handle=TFile.Open(fileName)
258 msg.warning(
"Can't access file %s.", fileName)
261 if not file_handle.IsOpen():
262 msg.warning(
"Can't open file %s.", fileName)
265 if file_handle.IsZombie():
266 msg.warning(
"File %s is a zombie.", fileName)
270 if file_handle.TestBit(TFile.kRecovered):
271 msg.warning(
"File %s needed to be recovered.", fileName)
276 msg.warning(
"File %s is corrupted.", fileName)
281 msg.info(
"File %s looks ok.", fileName)
284 ROOT.ROOT.DisableImplicitMT()
290 """Check consistency of number of events in file with metadata.
292 fileName name of file to check consistency of
293 nEntries number of events in fileName (e.g., obtained by examining event data object)
294 return 0 in case of consistency, 1 otherwise
296 from PyUtils.MetaReader
import read_metadata
298 msg.debug(
'Checking number of events in file %s ...', fileName)
301 msg.debug(
' according to metadata: {0}'.
format(meta[
"nentries"]))
302 msg.debug(
' according to event data: {0}'.
format(nEntries))
303 if meta[
"nentries"]
and nEntries
and meta[
"nentries"] != nEntries \
304 or meta[
"nentries"]
and not nEntries \
305 or not meta[
"nentries"]
and nEntries:
306 msg.warning(f
' number of events ({nEntries}) inconsistent with metadata ({meta["nentries"]}) in file {fileName!r}.')
309 msg.debug(
" looks ok.")
313 print(
"Usage: validate filename type requireTree verbosity")
314 print(
"'type' must be either 'event' or 'basket'")
315 print(
"'requireTree' must be either 'true' or 'false'")
316 print(
"'verbosity' must be either 'on' or 'off'")
336 if the_type!=
"event" and the_type!=
"basket":
339 if requireTree==
"true":
341 elif requireTree==
"false":
347 msg.setLevel(logging.DEBUG)
348 elif verbosity==
"off":
349 msg.setLevel(logging.INFO)
353 rc=
checkFile(fileName,the_type, requireTree)
354 msg.debug(
'Returning %s', rc)
362 if __name__ ==
'__main__':
364 ch=logging.StreamHandler(sys.stdout)
365 formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
366 ch.setFormatter(formatter)