6 from collections
import defaultdict, namedtuple
9 from AthenaCommon.Logging
import logging
10 from AthenaConfiguration.AllConfigFlags
import initConfigFlags
11 from AthenaConfiguration.AutoConfigFlags
import GetFileMD
12 from AthenaPython.PyAthena
import Alg, StatusCode, py_svc
13 from PyUtils.MetaReader
import read_metadata
14 from PyUtils.PoolFile
import PoolOpts, isRNTuple
18 def __init__(self, name="ValidateMetadataAlg", metadata=None):
19 super(ValidateMetadataAlg, self).
__init__(name=name)
21 self.
Event = namedtuple(
"Event", [
"runNumber",
"lumiBlock",
"eventNumber"])
30 return StatusCode.Success
34 if self.
sg.
contains(
"xAOD::EventInfo",
"EventInfo"):
35 ei = self.
sg.
retrieve(
"xAOD::EventInfo",
"EventInfo")
37 runNumber=ei.runNumber(),
38 lumiBlock=ei.lumiBlock(),
39 eventNumber=ei.eventNumber(),
42 logging.error(
"Event Data Validation FAILED!")
43 return StatusCode.Failure
47 logging.error(
"Could NOT find xAOD::EventInfo!")
48 return StatusCode.Failure
50 return StatusCode.Success
53 if set([event.runNumber
for event
in self.
events]) !=
set(
55 )
or set([event.lumiBlock
for event
in self.
events]) !=
set(
58 return StatusCode.Failure
59 return StatusCode.Success
63 logging.info(f
"Using input file {infile}")
65 current_file = ROOT.TFile(infile)
66 md =
read_metadata(infile, mode=
"full", unique_tag_info_values=
False)
70 md[infile][key][
"numberOfEvents"]
71 for key, value
in md[infile].
items()
72 if isinstance(value, dict)
and "numberOfEvents" in value
77 dataHeaderTree = current_file.Get(PoolOpts.TTreeNames.DataHeader)
78 if isinstance(dataHeaderTree, ROOT.TTree):
79 nevts_dh = dataHeaderTree.GetEntriesFast()
82 current_file.GetListOfKeys().Contains(PoolOpts.RNTupleNames.DataHeader)
83 and ROOT.gROOT.GetVersionInt() < 63100
86 "ROOT ver. 6.31/01 or greater needed to read RNTuple files"
88 dataHeaderRNT = current_file.Get(PoolOpts.RNTupleNames.DataHeader)
90 nevts_dh = ROOT.Experimental.RNTupleReader.Open(dataHeaderRNT).GetNEntries()
94 if not (md[infile][
"nentries"] == nevts_esi == nevts_dh):
96 "Number of events from EventStreamInfo inconsistent with number of entries in DataHeader"
100 tag_info = md[infile][
"/TagInfo"]
101 if "project_name" in tag_info
and isinstance(tag_info[
"project_name"], list):
102 if "IS_SIMULATION" in tag_info[
"project_name"]
and any(
103 [item
for item
in tag_info[
"project_name"]
if item.startswith(
"data")]
105 logging.error(
"/TagInfo contains values reserved for both MC and data")
111 for item
in tag_info[
"project_name"]
112 if item.startswith(
"data")
117 logging.error(
"/TagInfo contains values from different data taking periods")
120 "data_year" in tag_info
121 and isinstance(tag_info[
"data_year"], list)
122 and len(
set(tag_info[
"data_year"])) > 1
124 logging.error(
"/TagInfo contains values from different data taking periods")
131 """Check if FileMetaData is in all files"""
136 for file_name
in file_names
140 if "FileMetaData" in value
146 if __name__ ==
"__main__":
148 Script to validate metadata for self-consistency and consistentcy with event data:
149 - check if the number of events from EventStreamInfo equals to the number of entries in DataHeader
150 - check if /TagInfo metadata contains inconsistent information
151 - check if FileMetaData is present
152 - check uniqueness of run/lumiblock/event number per event and against the summary in the FileMetaData
154 Help: Use as checkMetadata.py --filesInput=DAOD.pool.root"
158 flags.Exec.EventPrintoutInterval = 1000
167 logging.error(
"FileMetaData missing")
169 except Exception
as exc:
170 logging.error(f
"Could not read metadata: {exc}")
173 logging.info(
"Input file metadata looks OK")
175 metadata = defaultdict(list)
177 for filename
in flags.Input.Files:
178 metadata[
"runNumbers"] +=
GetFileMD(filename).
get(
"runNumbers", [])
179 metadata[
"lumiBlocks"] +=
GetFileMD(filename).
get(
"lumiBlockNumbers", [])
182 from AthenaConfiguration.MainServicesConfig
import MainServicesCfg
187 from AthenaPoolCnvSvc.PoolReadConfig
import PoolReadCfg
194 sequenceName=
"AthAlgSeq",
201 sys.exit(
not sc.isSuccess())