14 from PyUtils
import RootUtils
15 ROOT = RootUtils.import_root()
16 from ROOT
import TFile, TTree, TDirectory, TStopwatch
18 from PyUtils.PoolFile
import isRNTuple
20 msg = logging.getLogger(__name__)
24 msg.debug(
'Checking branch %s ...', branch.GetName())
26 nBaskets=branch.GetWriteBasket()
28 msg.debug(
'Checking %s baskets ...', nBaskets)
30 for iBasket
in range(nBaskets):
31 basket=branch.GetBasket(iBasket)
33 msg.warning(
'Basket %s of branch %s is corrupted.', iBasket, branch.GetName() )
36 listOfSubBranches=branch.GetListOfBranches()
37 msg.debug(
'Checking %s subbranches ...', listOfSubBranches.GetEntries())
38 for subBranch
in listOfSubBranches:
42 msg.debug(
'Branch %s looks ok.', branch.GetName())
48 listOfBranches=tree.GetListOfBranches()
50 msg.debug(
'Checking %s branches ...', listOfBranches.GetEntries())
52 for branch
in listOfBranches:
54 msg.warning(
'Tree %s is corrupted (branch %s ).', tree.GetName(), branch.GetName())
62 nEntries=tree.GetEntries()
64 msg.debug(
'Checking %s entries ...', nEntries)
66 for i
in range(nEntries):
67 if tree.GetEntry(i)<0:
68 msg.warning(
'Event %s of tree %s is corrupted.', i, tree.GetName())
72 if (i%printInterval)==0
and i>0:
73 msg.info(
'Validated %s events so far ...', i)
80 reader=RNTupleReader.Open(ntuple)
81 except Exception
as err:
82 msg.warning(
'Could not open ntuple %s: %s', ntuple, err)
85 msg.debug(
'Checking %s entries ...', reader.GetNEntries())
90 entry = reader.CreateEntry()
91 except AttributeError:
92 entry = reader.GetModel().CreateEntry()
93 reader.LoadEntry(i, entry)
94 except Exception
as err:
95 msg.warning(
'Event %s of ntuple %s is corrupted: %s', i, reader.GetDescriptor().GetName(), err)
99 if (i%printInterval)==0
and i>0:
100 msg.info(
'Validated %s events so far ...', i)
105 """Bulk read each top level field cluster by cluster.
107 from array
import array
109 from ROOT
import RException
114 reader=RNTupleReader.Open(ntuple)
115 except Exception
as err:
116 msg.warning(
'Could not open ntuple %r: %r', ntuple, err)
120 descriptor = reader.GetDescriptor()
121 msg.debug(f
"ntupleName={descriptor.GetName()}")
123 model = reader.GetModel()
124 fieldZero = model.GetFieldZero()
125 subFields = fieldZero.GetSubFields()
126 msg.debug(f
"Top level fields number {subFields.size()}")
127 for field
in subFields:
128 msg.debug(f
"fieldName={field.GetFieldName()} typeName={field.GetTypeName()}")
129 bulk = model.CreateBulk(field.GetFieldName())
131 for clusterDescriptor
in descriptor.GetClusterIterable():
132 clusterIndex = ROOT.Experimental.RClusterIndex(clusterDescriptor.GetId(), 0)
133 size =
int(clusterDescriptor.GetNEntries())
134 maskReq =
array(
'b', (
True for i
in range(size)))
135 msg.debug(f
" cluster #{clusterIndex.GetClusterId()}"
136 f
" firstEntryIndex={clusterDescriptor.GetFirstEntryIndex()}"
138 values = bulk.ReadBulk(clusterIndex, maskReq, size)
139 msg.debug(f
" values array at {values}")
141 except RException
as err:
142 from traceback
import format_exception
143 msg.error(
"Exception reading ntuple %r\n%s", ntuple,
"".
join(format_exception(err)))
150 from PyUtils
import PoolFile
154 msg.debug(
'Checking directory %s ...', directory.GetName())
156 listOfKeys=directory.GetListOfKeys()
158 msg.debug(
'Checking %s keys ... ', listOfKeys.GetEntries())
160 for key
in listOfKeys:
162 msg.debug(
'Looking at key %s ...', key.GetName())
163 msg.debug(
'Key is of class %s.', key.GetClassName())
165 the_object=directory.Get(key.GetName())
167 msg.warning(
"Can't get object of key %s.", key.GetName())
170 if requireTree
and not isinstance(the_object, TTree):
171 msg.warning(
"Object of key %s is not of class TTree!", key.GetName())
174 if isinstance(the_object,TTree):
176 msg.debug(
'Checking tree %s ...', the_object.GetName())
179 if PoolFile.PoolOpts.TTreeNames.EventData == the_object.GetName():
180 nentries = the_object.GetEntries()
181 msg.debug(f
' contains {nentries} events')
182 elif PoolFile.PoolOpts.TTreeNames.MetaData == the_object.GetName():
184 msg.debug(
' contains MetaData')
186 if the_type==
'event':
189 elif the_type==
'basket':
193 msg.debug(
'Tree %s looks ok.', the_object.GetName())
197 msg.debug(
'Checking ntuple of key %s ...', key.GetName())
200 reader=RNTupleReader.Open(the_object)
201 except Exception
as err:
202 msg.warning(
'Could not open ntuple %s: %s', the_object, err)
206 if PoolFile.PoolOpts.RNTupleNames.EventData == reader.GetDescriptor().GetName():
207 nentries = reader.GetNEntries()
208 msg.debug(f
' contains {nentries} events')
209 elif PoolFile.PoolOpts.RNTupleNames.MetaData == reader.GetDescriptor().GetName():
211 msg.debug(
' contains MetaData')
213 if the_type==
'event':
216 elif the_type==
'basket':
220 msg.debug(
'NTuple of key %s looks ok.', key.GetName())
222 if isinstance(the_object, TDirectory):
223 if checkDirectory(the_object, the_type, requireTree, depth + 1)==1:
227 if depth == 0
and hasMetadata
and checkNEvents(directory.GetName(), nentries)==1:
230 msg.debug(
'Directory %s looks ok.', directory.GetName())
236 msg.info(
'Checking file %s ...', fileName)
238 isIMTEnabled = ROOT.ROOT.IsImplicitMTEnabled()
239 if not isIMTEnabled
and 'TRF_MULTITHREADED_VALIDATION' in os.environ
and 'ATHENA_CORE_NUMBER' in os.environ:
240 nThreads =
int(os.environ[
'ATHENA_CORE_NUMBER'])
241 msg.info(f
"Setting the number of implicit ROOT threads to {nThreads}")
242 ROOT.ROOT.EnableImplicitMT(nThreads)
244 file_handle=TFile.Open(fileName)
247 msg.warning(
"Can't access file %s.", fileName)
250 if not file_handle.IsOpen():
251 msg.warning(
"Can't open file %s.", fileName)
254 if file_handle.IsZombie():
255 msg.warning(
"File %s is a zombie.", fileName)
259 if file_handle.TestBit(TFile.kRecovered):
260 msg.warning(
"File %s needed to be recovered.", fileName)
265 msg.warning(
"File %s is corrupted.", fileName)
270 msg.info(
"File %s looks ok.", fileName)
272 if not isIMTEnabled
and 'TRF_MULTITHREADED_VALIDATION' in os.environ
and 'ATHENA_CORE_NUMBER' in os.environ:
273 ROOT.ROOT.DisableImplicitMT()
279 """Check consistency of number of events in file with metadata.
281 fileName name of file to check consistency of
282 nEntries number of events in fileName (e.g., obtained by examining event data object)
283 return 0 in case of consistency, 1 otherwise
285 from PyUtils.MetaReader
import read_metadata
287 msg.debug(
'Checking number of events in file %s ...', fileName)
290 msg.debug(
' according to metadata: {0}'.
format(meta[
"nentries"]))
291 msg.debug(
' according to event data: {0}'.
format(nEntries))
292 if meta[
"nentries"]
and nEntries
and meta[
"nentries"] != nEntries \
293 or meta[
"nentries"]
and not nEntries \
294 or not meta[
"nentries"]
and nEntries:
295 msg.warning(f
' number of events ({nEntries}) inconsistent with metadata ({meta["nentries"]}) in file {fileName!r}.')
298 msg.debug(
" looks ok.")
302 print(
"Usage: validate filename type requireTree verbosity")
303 print(
"'type' must be either 'event' or 'basket'")
304 print(
"'requireTree' must be either 'true' or 'false'")
305 print(
"'verbosity' must be either 'on' or 'off'")
325 if the_type!=
"event" and the_type!=
"basket":
328 if requireTree==
"true":
330 elif requireTree==
"false":
336 msg.setLevel(logging.DEBUG)
337 elif verbosity==
"off":
338 msg.setLevel(logging.INFO)
342 rc=
checkFile(fileName,the_type, requireTree)
343 msg.debug(
'Returning %s', rc)
351 if __name__ ==
'__main__':
353 ch=logging.StreamHandler(sys.stdout)
354 formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
355 ch.setFormatter(formatter)