14 from PyUtils
import RootUtils
15 ROOT = RootUtils.import_root()
16 from ROOT
import TFile, TTree, TDirectory, TStopwatch
19 msg = logging.getLogger(__name__)
23 msg.debug(
'Checking branch %s ...', branch.GetName())
25 nBaskets=branch.GetWriteBasket()
27 msg.debug(
'Checking %s baskets ...', nBaskets)
29 for iBasket
in range(nBaskets):
30 basket=branch.GetBasket(iBasket)
32 msg.warning(
'Basket %s of branch %s is corrupted.', iBasket, branch.GetName() )
35 listOfSubBranches=branch.GetListOfBranches()
36 msg.debug(
'Checking %s subbranches ...', listOfSubBranches.GetEntries())
37 for subBranch
in listOfSubBranches:
41 msg.debug(
'Branch %s looks ok.', branch.GetName())
47 listOfBranches=tree.GetListOfBranches()
49 msg.debug(
'Checking %s branches ...', listOfBranches.GetEntries())
51 for branch
in listOfBranches:
53 msg.warning(
'Tree %s is corrupted (branch %s ).', tree.GetName(), branch.GetName())
61 nEntries=tree.GetEntries()
63 msg.debug(
'Checking %s entries ...', nEntries)
65 for i
in range(nEntries):
66 if tree.GetEntry(i)<0:
67 msg.warning(
'Event %s of tree %s is corrupted.', i, tree.GetName())
71 if (i%printInterval)==0
and i>0:
72 msg.info(
'Validated %s events so far ...', i)
79 reader=RNTupleReader.Open(ntuple)
80 except Exception
as err:
81 msg.warning(
'Could not open ntuple %s: %s', ntuple, err)
84 msg.debug(
'Checking %s entries ...', reader.GetNEntries())
89 except Exception
as err:
90 msg.warning(
'Event %s of ntuple %s is corrupted: %s', i, reader.GetDescriptor().GetName(), err)
94 if (i%printInterval)==0
and i>0:
95 msg.info(
'Validated %s events so far ...', i)
100 """Check each page column by column.
104 pageSource = ntuple.MakePageSource()
106 except Exception
as err:
107 msg.warning(f
'Could not make page source for ntuple {ntuple!r}: {err}')
110 msg.debug(f
'Checking pages of ntuple {pageSource.GetNTupleName()!r}')
111 descriptor=pageSource.GetSharedDescriptorGuard().GetRef()
112 for columnDesc
in descriptor.GetColumnIterable():
113 columnPhysicalId = columnDesc.GetPhysicalId()
114 msg.debug(f
" Checking column {columnPhysicalId} ...")
115 for cluster
in descriptor.GetClusterIterable():
116 if not cluster.ContainsColumn(columnPhysicalId):
117 msg.debug(f
" Not in cluster {cluster.GetId()}")
119 nElements = cluster.GetColumnRange(columnPhysicalId).fNElements.fValue
121 for (pageNo, pageInfo)
in enumerate(cluster.GetPageRange(columnPhysicalId).fPageInfos):
122 msg.debug(f
" Page {pageNo}, {pageInfo.fNElements} elements in cluster {cluster.GetId()}")
123 buffer = bytearray(pageInfo.fLocator.fBytesOnStorage)
124 sealedPage = ROOT.Experimental.Detail.RPageStorage.RSealedPage(buffer,\
125 pageInfo.fLocator.fBytesOnStorage,\
128 pageSource.LoadSealedPage(columnPhysicalId,\
129 ROOT.Experimental.RClusterIndex(cluster.GetId(), idxInCluster),\
131 except Exception
as err:
132 msg.warning(f
'Could not load SealedPage {sealedPage!r} for page {pageNo} in cluster {cluster.GetId()}'
133 f
' of ntuple {pageSource.GetNTupleName()!r}: {err}')
135 idxInCluster += pageInfo.fNElements
136 if idxInCluster == nElements:
137 msg.debug(f
" {nElements} elements in cluster {cluster.GetId()}")
139 msg.warning(f
"NTuple {descriptor.GetName()!r}, cluster {cluster.GetId()}, column {cluster.GetColumnRange(columnPhysicalId).fPhysicalColumnId}: inconsistent meta-data")
146 from PyUtils
import PoolFile
149 msg.debug(
'Checking directory %s ...', directory.GetName())
151 listOfKeys=directory.GetListOfKeys()
153 msg.debug(
'Checking %s keys ... ', listOfKeys.GetEntries())
155 for key
in listOfKeys:
157 msg.debug(
'Looking at key %s ...', key.GetName())
158 msg.debug(
'Key is of class %s.', key.GetClassName())
160 the_object=directory.Get(key.GetName())
162 msg.warning(
"Can't get object of key %s.", key.GetName())
165 if requireTree
and not isinstance(the_object, TTree):
166 msg.warning(
"Object of key %s is not of class TTree!", key.GetName())
169 if isinstance(the_object,TTree):
171 msg.debug(
'Checking tree %s ...', the_object.GetName())
173 if depth == 0
and PoolFile.PoolOpts.TTreeNames.EventData == the_object.GetName():
174 nentries = the_object.GetEntries()
175 msg.debug(f
' contains {nentries} events')
177 if the_type==
'event':
180 elif the_type==
'basket':
184 msg.debug(
'Tree %s looks ok.', the_object.GetName())
186 if isinstance(the_object,RNTuple):
188 msg.debug(
'Checking ntuple of key %s ...', key.GetName())
191 reader=RNTupleReader.Open(the_object)
192 except Exception
as err:
193 msg.warning(
'Could not open ntuple %s: %s', the_object, err)
196 if depth == 0
and PoolFile.PoolOpts.RNTupleNames.EventData == reader.GetDescriptor().GetName():
197 nentries = reader.GetNEntries()
198 msg.debug(f
' contains {nentries} events')
200 if the_type==
'event':
203 elif the_type==
'basket':
207 msg.debug(
'NTuple of key %s looks ok.', key.GetName())
209 if isinstance(the_object, TDirectory):
210 if checkDirectory(the_object, the_type, requireTree, depth + 1)==1:
213 if depth == 0
and checkNEvents(directory.GetName(), nentries)==1:
216 msg.debug(
'Directory %s looks ok.', directory.GetName())
222 msg.info(
'Checking file %s ...', fileName)
224 isIMTEnabled = ROOT.ROOT.IsImplicitMTEnabled()
225 if not isIMTEnabled
and 'TRF_MULTITHREADED_VALIDATION' in os.environ
and 'ATHENA_CORE_NUMBER' in os.environ:
226 nThreads =
int(os.environ[
'ATHENA_CORE_NUMBER'])
227 msg.info(f
"Setting the number of implicit ROOT threads to {nThreads}")
228 ROOT.ROOT.EnableImplicitMT(nThreads)
230 file_handle=TFile.Open(fileName)
233 msg.warning(
"Can't access file %s.", fileName)
236 if not file_handle.IsOpen():
237 msg.warning(
"Can't open file %s.", fileName)
240 if file_handle.IsZombie():
241 msg.warning(
"File %s is a zombie.", fileName)
245 if file_handle.TestBit(TFile.kRecovered):
246 msg.warning(
"File %s needed to be recovered.", fileName)
251 msg.warning(
"File %s is corrupted.", fileName)
256 msg.info(
"File %s looks ok.", fileName)
258 if not isIMTEnabled
and 'TRF_MULTITHREADED_VALIDATION' in os.environ
and 'ATHENA_CORE_NUMBER' in os.environ:
259 ROOT.ROOT.DisableImplicitMT()
265 """Check consistency of number of events in file with metadata.
267 fileName name of file to check consistency of
268 nEntries number of events in fileName (e.g., obtained by examining event data object)
269 return 0 in case of consistency, 1 otherwise
271 from PyUtils.MetaReader
import read_metadata
273 from AthenaCommon.Logging
import logging
as athlogging
274 from PyUtils.MetaReader
import msg
as metamsg
276 if msg.getEffectiveLevel() != logging.DEBUG:
277 metamsg.setLevel(athlogging.ERROR)
279 msg.debug(
'Checking number of events in file %s ...', fileName)
282 msg.debug(
' according to metadata: {0}'.
format(meta[
"nentries"]))
283 msg.debug(
' according to event data: {0}'.
format(nEntries))
284 if meta[
"nentries"]
and nEntries
and meta[
"nentries"] != nEntries \
285 or meta[
"nentries"]
and not nEntries \
286 or not meta[
"nentries"]
and nEntries:
287 msg.warning(f
' number of events ({nEntries}) inconsistent with metadata ({meta["nentries"]}) in file {fileName!r}.')
290 msg.debug(
" looks ok.")
294 print(
"Usage: validate filename type requireTree verbosity")
295 print(
"'type' must be either 'event' or 'basket'")
296 print(
"'requireTree' must be either 'true' or 'false'")
297 print(
"'verbosity' must be either 'on' or 'off'")
317 if the_type!=
"event" and the_type!=
"basket":
320 if requireTree==
"true":
322 elif requireTree==
"false":
328 msg.setLevel(logging.DEBUG)
329 elif verbosity==
"off":
330 msg.setLevel(logging.INFO)
334 rc=
checkFile(fileName,the_type, requireTree)
335 msg.debug(
'Returning %s', rc)
343 if __name__ ==
'__main__':
345 ch=logging.StreamHandler(sys.stdout)
346 formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
347 ch.setFormatter(formatter)