14 from PyUtils
import RootUtils
15 ROOT = RootUtils.import_root()
16 from ROOT
import TFile, TTree, TDirectory, TStopwatch
19 msg = logging.getLogger(__name__)
23 msg.debug(
'Checking branch %s ...', branch.GetName())
25 nBaskets=branch.GetWriteBasket()
27 msg.debug(
'Checking %s baskets ...', nBaskets)
29 for iBasket
in range(nBaskets):
30 basket=branch.GetBasket(iBasket)
32 msg.warning(
'Basket %s of branch %s is corrupted.', iBasket, branch.GetName() )
35 listOfSubBranches=branch.GetListOfBranches()
36 msg.debug(
'Checking %s subbranches ...', listOfSubBranches.GetEntries())
37 for subBranch
in listOfSubBranches:
41 msg.debug(
'Branch %s looks ok.', branch.GetName())
47 listOfBranches=tree.GetListOfBranches()
49 msg.debug(
'Checking %s branches ...', listOfBranches.GetEntries())
51 for branch
in listOfBranches:
53 msg.warning(
'Tree %s is corrupted (branch %s ).', tree.GetName(), branch.GetName())
61 nEntries=tree.GetEntries()
63 msg.debug(
'Checking %s entries ...', nEntries)
65 for i
in range(nEntries):
66 if tree.GetEntry(i)<0:
67 msg.warning(
'Event %s of tree %s is corrupted.', i, tree.GetName())
71 if (i%printInterval)==0
and i>0:
72 msg.info(
'Validated %s events so far ...', i)
79 reader=RNTupleReader.Open(ntuple)
80 except Exception
as err:
81 msg.warning(
'Could not open ntuple %s: %s', ntuple, err)
84 msg.debug(
'Checking %s entries ...', reader.GetNEntries())
89 except Exception
as err:
90 msg.warning(
'Event %s of ntuple %s is corrupted: %s', i, reader.GetDescriptor().GetName(), err)
94 if (i%printInterval)==0
and i>0:
95 msg.info(
'Validated %s events so far ...', i)
100 """Bulk read each top level field cluster by cluster.
102 from array
import array
105 reader=RNTupleReader.Open(ntuple)
106 except Exception
as err:
107 msg.warning(
'Could not open ntuple %r: %r', ntuple, err)
111 descriptor = reader.GetDescriptor()
112 msg.debug(f
"ntupleName={descriptor.GetName()}")
114 model = reader.GetModel()
115 fieldZero = model.GetFieldZero()
116 subFields = fieldZero.GetSubFields()
117 msg.debug(f
"Top level fields number {subFields.size()}")
118 for field
in subFields:
119 msg.debug(f
"fieldName={field.GetFieldName()} typeName={field.GetTypeName()}")
120 bulk = model.CreateBulk(field.GetFieldName())
122 for clusterDescriptor
in descriptor.GetClusterIterable():
123 clusterIndex = ROOT.Experimental.RClusterIndex(clusterDescriptor.GetId(), 0)
124 size =
int(clusterDescriptor.GetNEntries())
125 maskReq =
array(
'b', (
True for i
in range(size)))
126 msg.debug(f
" cluster #{clusterIndex.GetClusterId()}"
127 f
" firstEntryIndex={clusterDescriptor.GetFirstEntryIndex()}"
129 values = bulk.ReadBulk(clusterIndex, maskReq, size)
130 msg.debug(f
" values array at {values}")
132 except ROOT.Experimental.RException
as err:
133 from traceback
import format_exception
134 msg.error(
"Exception reading ntuple %r\n%s", ntuple,
"".
join(format_exception(err)))
141 from PyUtils
import PoolFile
145 msg.debug(
'Checking directory %s ...', directory.GetName())
147 listOfKeys=directory.GetListOfKeys()
149 msg.debug(
'Checking %s keys ... ', listOfKeys.GetEntries())
151 for key
in listOfKeys:
153 msg.debug(
'Looking at key %s ...', key.GetName())
154 msg.debug(
'Key is of class %s.', key.GetClassName())
156 the_object=directory.Get(key.GetName())
158 msg.warning(
"Can't get object of key %s.", key.GetName())
161 if requireTree
and not isinstance(the_object, TTree):
162 msg.warning(
"Object of key %s is not of class TTree!", key.GetName())
165 if isinstance(the_object,TTree):
167 msg.debug(
'Checking tree %s ...', the_object.GetName())
170 if PoolFile.PoolOpts.TTreeNames.EventData == the_object.GetName():
171 nentries = the_object.GetEntries()
172 msg.debug(f
' contains {nentries} events')
173 elif PoolFile.PoolOpts.TTreeNames.MetaData == the_object.GetName():
175 msg.debug(
' contains MetaData')
177 if the_type==
'event':
180 elif the_type==
'basket':
184 msg.debug(
'Tree %s looks ok.', the_object.GetName())
186 if isinstance(the_object,RNTuple):
188 msg.debug(
'Checking ntuple of key %s ...', key.GetName())
191 reader=RNTupleReader.Open(the_object)
192 except Exception
as err:
193 msg.warning(
'Could not open ntuple %s: %s', the_object, err)
197 if PoolFile.PoolOpts.RNTupleNames.EventData == reader.GetDescriptor().GetName():
198 nentries = reader.GetNEntries()
199 msg.debug(f
' contains {nentries} events')
200 elif PoolFile.PoolOpts.RNTupleNames.MetaData == reader.GetDescriptor().GetName():
202 msg.debug(
' contains MetaData')
204 if the_type==
'event':
207 elif the_type==
'basket':
211 msg.debug(
'NTuple of key %s looks ok.', key.GetName())
213 if isinstance(the_object, TDirectory):
214 if checkDirectory(the_object, the_type, requireTree, depth + 1)==1:
218 if depth == 0
and hasMetadata
and checkNEvents(directory.GetName(), nentries)==1:
221 msg.debug(
'Directory %s looks ok.', directory.GetName())
227 msg.info(
'Checking file %s ...', fileName)
229 isIMTEnabled = ROOT.ROOT.IsImplicitMTEnabled()
230 if not isIMTEnabled
and 'TRF_MULTITHREADED_VALIDATION' in os.environ
and 'ATHENA_CORE_NUMBER' in os.environ:
231 nThreads =
int(os.environ[
'ATHENA_CORE_NUMBER'])
232 msg.info(f
"Setting the number of implicit ROOT threads to {nThreads}")
233 ROOT.ROOT.EnableImplicitMT(nThreads)
235 file_handle=TFile.Open(fileName)
238 msg.warning(
"Can't access file %s.", fileName)
241 if not file_handle.IsOpen():
242 msg.warning(
"Can't open file %s.", fileName)
245 if file_handle.IsZombie():
246 msg.warning(
"File %s is a zombie.", fileName)
250 if file_handle.TestBit(TFile.kRecovered):
251 msg.warning(
"File %s needed to be recovered.", fileName)
256 msg.warning(
"File %s is corrupted.", fileName)
261 msg.info(
"File %s looks ok.", fileName)
263 if not isIMTEnabled
and 'TRF_MULTITHREADED_VALIDATION' in os.environ
and 'ATHENA_CORE_NUMBER' in os.environ:
264 ROOT.ROOT.DisableImplicitMT()
270 """Check consistency of number of events in file with metadata.
272 fileName name of file to check consistency of
273 nEntries number of events in fileName (e.g., obtained by examining event data object)
274 return 0 in case of consistency, 1 otherwise
276 from PyUtils.MetaReader
import read_metadata
278 msg.debug(
'Checking number of events in file %s ...', fileName)
281 msg.debug(
' according to metadata: {0}'.
format(meta[
"nentries"]))
282 msg.debug(
' according to event data: {0}'.
format(nEntries))
283 if meta[
"nentries"]
and nEntries
and meta[
"nentries"] != nEntries \
284 or meta[
"nentries"]
and not nEntries \
285 or not meta[
"nentries"]
and nEntries:
286 msg.warning(f
' number of events ({nEntries}) inconsistent with metadata ({meta["nentries"]}) in file {fileName!r}.')
289 msg.debug(
" looks ok.")
293 print(
"Usage: validate filename type requireTree verbosity")
294 print(
"'type' must be either 'event' or 'basket'")
295 print(
"'requireTree' must be either 'true' or 'false'")
296 print(
"'verbosity' must be either 'on' or 'off'")
316 if the_type!=
"event" and the_type!=
"basket":
319 if requireTree==
"true":
321 elif requireTree==
"false":
327 msg.setLevel(logging.DEBUG)
328 elif verbosity==
"off":
329 msg.setLevel(logging.INFO)
333 rc=
checkFile(fileName,the_type, requireTree)
334 msg.debug(
'Returning %s', rc)
342 if __name__ ==
'__main__':
344 ch=logging.StreamHandler(sys.stdout)
345 formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
346 ch.setFormatter(formatter)