ATLAS Offline Software
Loading...
Searching...
No Matches
checkMetadata.py
Go to the documentation of this file.
1#!/usr/bin/env python3
2
3# Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
4
5import sys
6from collections import defaultdict, namedtuple
7
8import ROOT
9from AthenaCommon.Logging import logging
10from AthenaConfiguration.AllConfigFlags import initConfigFlags
11from AthenaConfiguration.AutoConfigFlags import GetFileMD
12from AthenaPython.PyAthena import Alg, StatusCode, py_svc
13from PyUtils.MetaReader import read_metadata
14from PyUtils.PoolFile import PoolOpts, isRNTuple
15
16
18 def __init__(self, name="ValidateMetadataAlg", metadata=None):
19 super(ValidateMetadataAlg, self).__init__(name=name)
20 self.events = set()
21 self.Event = namedtuple("Event", ["runNumber", "lumiBlock", "eventNumber"])
22 if metadata:
23 self.metadata = metadata
24 else:
25 self.metadata = defaultdict(list)
26 return
27
28 def initialize(self):
29 self.sg = py_svc("StoreGateSvc")
30 return StatusCode.Success
31
32 def execute(self):
33 # Read the event info and check the uniqueness of run/lb/evtnumber
34 if self.sg.contains("xAOD::EventInfo", "EventInfo"):
35 ei = self.sg.retrieve("xAOD::EventInfo", "EventInfo")
36 event = self.Event(
37 runNumber=ei.runNumber(),
38 lumiBlock=ei.lumiBlock(),
39 eventNumber=ei.eventNumber(),
40 )
41 if event in self.events:
42 logging.error("Event Data Validation FAILED!")
43 return StatusCode.Failure
44 else:
45 self.events.add(event)
46 else:
47 logging.error("Could NOT find xAOD::EventInfo!")
48 return StatusCode.Failure
49
50 return StatusCode.Success
51
52 def finalize(self):
53 if set([event.runNumber for event in self.events]) != set(
54 self.metadata["runNumbers"]
55 ) or set([event.lumiBlock for event in self.events]) != set(
56 self.metadata["lumiBlocks"]
57 ):
58 return StatusCode.Failure
59 return StatusCode.Success
60
61
63 logging.info(f"Using input file {infile}")
64
65 current_file = ROOT.TFile(infile)
66 md = read_metadata(infile, mode="full", unique_tag_info_values=False)
67
68 nevts_esi = next(
69 (
70 md[infile][key]["numberOfEvents"]
71 for key, value in md[infile].items()
72 if isinstance(value, dict) and "numberOfEvents" in value
73 ),
74 None,
75 )
76
77 dataHeaderTree = current_file.Get(PoolOpts.TTreeNames.DataHeader)
78 if isinstance(dataHeaderTree, ROOT.TTree):
79 nevts_dh = dataHeaderTree.GetEntriesFast()
80 else:
81 if (
82 current_file.GetListOfKeys().Contains(PoolOpts.RNTupleNames.DataHeader)
83 and ROOT.gROOT.GetVersionInt() < 63100
84 ):
85 raise RuntimeError(
86 "ROOT ver. 6.31/01 or greater needed to read RNTuple files"
87 )
88 dataHeaderRNT = current_file.Get(PoolOpts.RNTupleNames.DataHeader)
89 if isRNTuple( dataHeaderRNT ):
90 nevts_dh = ROOT.Experimental.RNTupleReader.Open(dataHeaderRNT).GetNEntries()
91 else:
92 nevts_dh = None
93
94 if not (md[infile]["nentries"] == nevts_esi == nevts_dh):
95 logging.error(
96 "Number of events from EventStreamInfo inconsistent with number of entries in DataHeader"
97 )
98 return 1
99
100 tag_info = md[infile]["/TagInfo"]
101 if "project_name" in tag_info and isinstance(tag_info["project_name"], list):
102 if "IS_SIMULATION" in tag_info["project_name"] and any(
103 [item for item in tag_info["project_name"] if item.startswith("data")]
104 ):
105 logging.error("/TagInfo contains values reserved for both MC and data")
106 return 1
107 if (
108 len(
109 set(
110 item[5:6]
111 for item in tag_info["project_name"]
112 if item.startswith("data")
113 )
114 )
115 > 1
116 ):
117 logging.error("/TagInfo contains values from different data taking periods")
118 return 1
119 if (
120 "data_year" in tag_info
121 and isinstance(tag_info["data_year"], list)
122 and len(set(tag_info["data_year"])) > 1
123 ):
124 logging.error("/TagInfo contains values from different data taking periods")
125 return 1
126
127 return 0
128
129
130def checkFileMetaData(file_names):
131 """Check if FileMetaData is in all files"""
132
133 return all(
134 [
135 fmd_items
136 for file_name in file_names
137 for fmd_items in [
138 value
139 for _, value in GetFileMD(file_name)["metadata_items"].items()
140 if "FileMetaData" in value
141 ]
142 ]
143 )
144
145
146if __name__ == "__main__":
147 """
148 Script to validate metadata for self-consistency and consistentcy with event data:
149 - check if the number of events from EventStreamInfo equals to the number of entries in DataHeader
150 - check if /TagInfo metadata contains inconsistent information
151 - check if FileMetaData is present
152 - check uniqueness of run/lumiblock/event number per event and against the summary in the FileMetaData
153
154 Help: Use as checkMetadata.py --filesInput=DAOD.pool.root"
155 """
156
157 flags = initConfigFlags()
158 flags.Exec.EventPrintoutInterval = 1000
159 flags.fillFromArgs()
160 flags.lock()
161
162 if any(sc := validateInputMetadata(infile) for infile in flags.Input.Files):
163 sys.exit(sc)
164
165 try:
166 if not checkFileMetaData(flags.Input.Files):
167 logging.error("FileMetaData missing")
168 sys.exit(1)
169 except Exception as exc:
170 logging.error(f"Could not read metadata: {exc}")
171 sys.exit(1)
172
173 logging.info("Input file metadata looks OK")
174
175 metadata = defaultdict(list)
176
177 for filename in flags.Input.Files:
178 metadata["runNumbers"] += GetFileMD(filename).get("runNumbers", [])
179 metadata["lumiBlocks"] += GetFileMD(filename).get("lumiBlockNumbers", [])
180
181 # Setup the main services
182 from AthenaConfiguration.MainServicesConfig import MainServicesCfg
183
184 cfg = MainServicesCfg(flags)
185
186 # Setup input reading
187 from AthenaPoolCnvSvc.PoolReadConfig import PoolReadCfg
188
189 cfg.merge(PoolReadCfg(flags))
190
191 # Setup Validation Algorithm
192 cfg.addEventAlgo(
193 ValidateMetadataAlg("Validator", metadata=metadata),
194 sequenceName="AthAlgSeq",
195 )
196
197 # Run the job
198 sc = cfg.run()
199
200 # Exit accordingly
201 sys.exit(not sc.isSuccess())
__init__(self, name="ValidateMetadataAlg", metadata=None)
STL class.
bool add(const std::string &hname, TKey *tobj)
Definition fastadd.cxx:55
T * get(TKey *tobj)
get a TObject* from a TKey* (why can't a TObject be a TKey?)
Definition hcg.cxx:130
bool contains(const std::string &s, const std::string &regx)
does a string contain the substring
Definition hcg.cxx:114
checkFileMetaData(file_names)
void initialize()