ATLAS Offline Software
checkMetadata.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
4 
5 import sys
6 from collections import defaultdict, namedtuple
7 
8 import ROOT
9 from AthenaCommon.Logging import logging
10 from AthenaConfiguration.AllConfigFlags import initConfigFlags
11 from AthenaConfiguration.AutoConfigFlags import GetFileMD
12 from AthenaPython.PyAthena import Alg, StatusCode, py_svc
13 from PyUtils.MetaReader import read_metadata
14 from PyUtils.PoolFile import PoolOpts, isRNTuple
15 
16 
18  def __init__(self, name="ValidateMetadataAlg", metadata=None):
19  super(ValidateMetadataAlg, self).__init__(name=name)
20  self.events = set()
21  self.Event = namedtuple("Event", ["runNumber", "lumiBlock", "eventNumber"])
22  if metadata:
23  self.metadata = metadata
24  else:
25  self.metadata = defaultdict(list)
26  return
27 
28  def initialize(self):
29  self.sg = py_svc("StoreGateSvc")
30  return StatusCode.Success
31 
32  def execute(self):
33  # Read the event info and check the uniqueness of run/lb/evtnumber
34  if self.sg.contains("xAOD::EventInfo", "EventInfo"):
35  ei = self.sg.retrieve("xAOD::EventInfo", "EventInfo")
36  event = self.Event(
37  runNumber=ei.runNumber(),
38  lumiBlock=ei.lumiBlock(),
39  eventNumber=ei.eventNumber(),
40  )
41  if event in self.events:
42  logging.error("Event Data Validation FAILED!")
43  return StatusCode.Failure
44  else:
45  self.events.add(event)
46  else:
47  logging.error("Could NOT find xAOD::EventInfo!")
48  return StatusCode.Failure
49 
50  return StatusCode.Success
51 
52  def finalize(self):
53  if set([event.runNumber for event in self.events]) != set(
54  self.metadata["runNumbers"]
55  ) or set([event.lumiBlock for event in self.events]) != set(
56  self.metadata["lumiBlocks"]
57  ):
58  return StatusCode.Failure
59  return StatusCode.Success
60 
61 
63  logging.info(f"Using input file {infile}")
64 
65  current_file = ROOT.TFile(infile)
66  md = read_metadata(infile, mode="full", unique_tag_info_values=False)
67 
68  nevts_esi = next(
69  (
70  md[infile][key]["numberOfEvents"]
71  for key, value in md[infile].items()
72  if isinstance(value, dict) and "numberOfEvents" in value
73  ),
74  None,
75  )
76 
77  dataHeaderTree = current_file.Get(PoolOpts.TTreeNames.DataHeader)
78  if isinstance(dataHeaderTree, ROOT.TTree):
79  nevts_dh = dataHeaderTree.GetEntriesFast()
80  else:
81  if (
82  current_file.GetListOfKeys().Contains(PoolOpts.RNTupleNames.DataHeader)
83  and ROOT.gROOT.GetVersionInt() < 63100
84  ):
85  raise RuntimeError(
86  "ROOT ver. 6.31/01 or greater needed to read RNTuple files"
87  )
88  dataHeaderRNT = current_file.Get(PoolOpts.RNTupleNames.DataHeader)
89  if isRNTuple( dataHeaderRNT ):
90  nevts_dh = ROOT.Experimental.RNTupleReader.Open(dataHeaderRNT).GetNEntries()
91  else:
92  nevts_dh = None
93 
94  if not (md[infile]["nentries"] == nevts_esi == nevts_dh):
95  logging.error(
96  "Number of events from EventStreamInfo inconsistent with number of entries in DataHeader"
97  )
98  return 1
99 
100  tag_info = md[infile]["/TagInfo"]
101  if "project_name" in tag_info and isinstance(tag_info["project_name"], list):
102  if "IS_SIMULATION" in tag_info["project_name"] and any(
103  [item for item in tag_info["project_name"] if item.startswith("data")]
104  ):
105  logging.error("/TagInfo contains values reserved for both MC and data")
106  return 1
107  if (
108  len(
109  set(
110  item[5:6]
111  for item in tag_info["project_name"]
112  if item.startswith("data")
113  )
114  )
115  > 1
116  ):
117  logging.error("/TagInfo contains values from different data taking periods")
118  return 1
119  if (
120  "data_year" in tag_info
121  and isinstance(tag_info["data_year"], list)
122  and len(set(tag_info["data_year"])) > 1
123  ):
124  logging.error("/TagInfo contains values from different data taking periods")
125  return 1
126 
127  return 0
128 
129 
130 def checkFileMetaData(file_names):
131  """Check if FileMetaData is in all files"""
132 
133  return all(
134  [
135  fmd_items
136  for file_name in file_names
137  for fmd_items in [
138  value
139  for _, value in GetFileMD(file_name)["metadata_items"].items()
140  if "FileMetaData" in value
141  ]
142  ]
143  )
144 
145 
146 if __name__ == "__main__":
147  """
148  Script to validate metadata for self-consistency and consistentcy with event data:
149  - check if the number of events from EventStreamInfo equals to the number of entries in DataHeader
150  - check if /TagInfo metadata contains inconsistent information
151  - check if FileMetaData is present
152  - check uniqueness of run/lumiblock/event number per event and against the summary in the FileMetaData
153 
154  Help: Use as checkMetadata.py --filesInput=DAOD.pool.root"
155  """
156 
157  flags = initConfigFlags()
158  flags.Exec.EventPrintoutInterval = 1000
159  flags.fillFromArgs()
160  flags.lock()
161 
162  if any(sc := validateInputMetadata(infile) for infile in flags.Input.Files):
163  sys.exit(sc)
164 
165  try:
166  if not checkFileMetaData(flags.Input.Files):
167  logging.error("FileMetaData missing")
168  sys.exit(1)
169  except Exception as exc:
170  logging.error(f"Could not read metadata: {exc}")
171  sys.exit(1)
172 
173  logging.info("Input file metadata looks OK")
174 
175  metadata = defaultdict(list)
176 
177  for filename in flags.Input.Files:
178  metadata["runNumbers"] += GetFileMD(filename).get("runNumbers", [])
179  metadata["lumiBlocks"] += GetFileMD(filename).get("lumiBlockNumbers", [])
180 
181  # Setup the main services
182  from AthenaConfiguration.MainServicesConfig import MainServicesCfg
183 
184  cfg = MainServicesCfg(flags)
185 
186  # Setup input reading
187  from AthenaPoolCnvSvc.PoolReadConfig import PoolReadCfg
188 
189  cfg.merge(PoolReadCfg(flags))
190 
191  # Setup Validation Algorithm
192  cfg.addEventAlgo(
193  ValidateMetadataAlg("Validator", metadata=metadata),
194  sequenceName="AthAlgSeq",
195  )
196 
197  # Run the job
198  sc = cfg.run()
199 
200  # Exit accordingly
201  sys.exit(not sc.isSuccess())
python.PyKernel.retrieve
def retrieve(aClass, aKey=None)
Definition: PyKernel.py:110
python.checkMetadata.ValidateMetadataAlg.__init__
def __init__(self, name="ValidateMetadataAlg", metadata=None)
Definition: checkMetadata.py:18
python.AutoConfigFlags.GetFileMD
def GetFileMD(filenames, allowEmpty=True, maxLevel='peeker')
Definition: AutoConfigFlags.py:65
python.MetaReader.read_metadata
def read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter=None, unique_tag_info_values=True, ignoreNonExistingLocalFiles=False)
Definition: MetaReader.py:53
python.checkMetadata.ValidateMetadataAlg.sg
sg
Definition: checkMetadata.py:29
python.checkMetadata.ValidateMetadataAlg
Definition: checkMetadata.py:17
python.checkMetadata.validateInputMetadata
def validateInputMetadata(infile)
Definition: checkMetadata.py:62
python.checkMetadata.ValidateMetadataAlg.execute
def execute(self)
Definition: checkMetadata.py:32
python.Bindings.py_svc
def py_svc(svcName, createIf=True, iface=None)
Definition: Control/AthenaPython/python/Bindings.py:98
python.checkMetadata.ValidateMetadataAlg.events
events
Definition: checkMetadata.py:20
fillPileUpNoiseLumi.next
next
Definition: fillPileUpNoiseLumi.py:52
contains
bool contains(const std::string &s, const std::string &regx)
does a string contain the substring
Definition: hcg.cxx:111
python.MainServicesConfig.MainServicesCfg
def MainServicesCfg(flags, LoopMgr='AthenaEventLoopMgr')
Definition: MainServicesConfig.py:260
add
bool add(const std::string &hname, TKey *tobj)
Definition: fastadd.cxx:55
python.checkMetadata.ValidateMetadataAlg.metadata
metadata
Definition: checkMetadata.py:23
python.checkMetadata.ValidateMetadataAlg.Event
Event
Definition: checkMetadata.py:21
python.checkMetadata.ValidateMetadataAlg.initialize
def initialize(self)
Definition: checkMetadata.py:28
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
python.checkMetadata.checkFileMetaData
def checkFileMetaData(file_names)
Definition: checkMetadata.py:130
TrigJetMonitorAlgorithm.items
items
Definition: TrigJetMonitorAlgorithm.py:79
Cut::all
@ all
Definition: SUSYToolsAlg.cxx:67
python.AllConfigFlags.initConfigFlags
def initConfigFlags()
Definition: AllConfigFlags.py:19
python.checkMetadata.ValidateMetadataAlg.finalize
def finalize(self)
Definition: checkMetadata.py:52
get
T * get(TKey *tobj)
get a TObject* from a TKey* (why can't a TObject be a TKey?)
Definition: hcg.cxx:127
python.PoolFile.isRNTuple
def isRNTuple(obj)
Definition: PoolFile.py:36
python.PoolReadConfig.PoolReadCfg
def PoolReadCfg(flags)
Definition: PoolReadConfig.py:69