ATLAS Offline Software
MetaReader.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
2 
3 import os
4 import re
5 from fnmatch import fnmatchcase
6 from AthenaCommon.Logging import logging
7 from AthenaConfiguration.AthConfigFlags import isGaudiEnv
8 from PyUtils.PoolFile import isRNTuple
9 from ROOT import gSystem
10 from AthenaConfiguration.Enums import Project
11 
12 
13 msg = logging.getLogger('MetaReader')
14 
15 # compile the regex needed in _convert_value() outside it to optimize the code.
16 regexEventStreamInfo = re.compile(r'^EventStreamInfo(_p\d+)?$')
17 regexIOVMetaDataContainer = re.compile(r'^IOVMetaDataContainer(_p\d+)?$')
18 regexByteStreamMetadataContainer = re.compile(r'^ByteStreamMetadataContainer(_p\d+)?$')
19 regexXAODCutBookkeeperContainer = re.compile(r'^xAOD::CutBookkeeperContainer(_v\d+)?$')
20 regexXAODCutBookkeeperContainerAux = re.compile(r'^xAOD::CutBookkeeperAuxContainer(_v\d+)?$')
21 regexXAODEventFormat = re.compile(r'^xAOD::EventFormat(_v\d+)?$')
22 regexXAODFileMetaData = re.compile(r'^xAOD::FileMetaData(_v\d+)?$')
23 regexXAODFileMetaDataAux = re.compile(r'^xAOD::FileMetaDataAuxInfo(_v\d+)?$')
24 regexXAODFileMetaDataAuxDyn = re.compile(r'^(xAOD::)?FileMetaData.*AuxDyn(\.[a-zA-Z0-9]+)?$')
25 regexXAODTriggerMenu = re.compile(r'^DataVector<xAOD::TriggerMenu(_v\d+)?>$') # Run 2
26 regexXAODTriggerMenuAux = re.compile(r'^xAOD::TriggerMenuAuxContainer(_v\d+)?$') # Run 2
27 regexXAODTriggerMenuJson = re.compile(r'^DataVector<xAOD::TriggerMenuJson(_v\d+)?>$') # Run 3
28 regexXAODTriggerMenuJsonAux = re.compile(r'^xAOD::TriggerMenuJsonAuxContainer(_v\d+)?$') # Run 3
29 regexXAODTruthMetaData = re.compile(r'^DataVector<xAOD::TruthMetaData(_v\d+)?>$')
30 regexXAODTruthMetaDataAux = re.compile(r'^xAOD::TruthMetaDataAuxContainer(_v\d+)?$')
31 regex_cppname = re.compile(r'^([\w:]+)(<.*>)?$')
32 # regex_persistent_class = re.compile(r'^([a-zA-Z]+_p\d+::)*[a-zA-Z]+_p\d+$')
33 regex_persistent_class = re.compile(r'^([a-zA-Z]+(_[pv]\d+)?::)*[a-zA-Z]+_[pv]\d+$')
34 regex_BS_files = re.compile(r'^(\w+):.*((\.D?RAW\..*)|(\.data$))')
35 regex_URI_scheme = re.compile(r'^([A-Za-z0-9\+\.\-]+)\:')
36 
37 lite_primary_keys_to_keep = [
38  'lumiBlockNumbers', 'runNumbers', 'mc_event_number', 'mc_channel_number',
39  'eventTypes', 'processingTags', 'itemList']
40 lite_TagInfo_keys_to_keep = [
41  'beam_energy', 'beam_type', 'GeoAtlas', 'IOVDbGlobalTag',
42  'AODFixVersion', 'project_name', 'mc_campaign', 'keywords']
43 
44 trigger_keys = [
45  'TriggerConfigInfo',
46  'TriggerMenu', 'TriggerMenuJson_BG', 'TriggerMenuJson_HLT', 'TriggerMenuJson_HLTMonitoring', 'TriggerMenuJson_HLTPS', 'TriggerMenuJson_L1', 'TriggerMenuJson_L1PS',
47  '/TRIGGER/HLT/Groups', '/TRIGGER/HLT/HltConfigKeys', '/TRIGGER/HLT/Menu', '/TRIGGER/HLT/PrescaleKey', '/TRIGGER/HLT/Prescales',
48  '/TRIGGER/LVL1/ItemDef', '/TRIGGER/LVL1/Lvl1ConfigKey', '/TRIGGER/LVL1/Menu', '/TRIGGER/LVL1/Prescales', '/TRIGGER/LVL1/Thresholds',
49  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenu', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_BG', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT',
50  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS',
51  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS',
52 ]
53 
54 trigger_menu_json_map = {
55  "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1PSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS",
56  "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_BGAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_BG",
57  "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT",
58  "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTMonitoringAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring",
59  "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTPSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS",
60  "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1Aux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1",
61  "TriggerMenuJson_L1PSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS",
62  "TriggerMenuJson_BGAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_BG",
63  "TriggerMenuJson_HLTAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT",
64  "TriggerMenuJson_HLTMonitoringAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring",
65  "TriggerMenuJson_HLTPSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS",
66  "TriggerMenuJson_L1Aux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1",
67 }
68 
69 
70 def read_metadata(filenames, file_type = None, mode = 'lite', promote = None, meta_key_filter = None,
71  unique_tag_info_values = True, ignoreNonExistingLocalFiles=False):
72  """
73  This tool is independent of Athena framework and returns the metadata from a given file.
74  :param filenames: the input file from which metadata needs to be extracted.
75  :param file_type: the type of file. POOL or BS (bytestream: RAW, DRAW) files.
76  :param mode: if true, will return all metadata associated with the filename. By default, is false and this will
77  return a "tiny" version which have only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'.
78  :return: a dictionary of metadata for the given input file.
79  """
80 
81  # make the mode available in the _convert methods
82  global _gbl_mode
83  _gbl_mode = mode
84 
85  from RootUtils import PyROOTFixes # noqa F401
86 
87  # Check if the input is a file or a list of files.
88  if isinstance(filenames, str):
89  filenames = [filenames]
90 
91  # Check if file_type is an allowed value
92  if file_type is not None:
93  if file_type not in ('POOL', 'BS'):
94  raise NameError('Allowed values for \'file_type\' parameter are: "POOL" or "BS": you provided "' + file_type + '"')
95  else:
96  msg.info('Forced file_type: {0}'.format(file_type))
97 
98  # Check the value of mode parameter
99  if mode not in ('tiny', 'lite', 'full', 'peeker', 'iov'):
100  raise NameError('Allowed values for "mode" parameter are: "tiny", "lite", "peeker", "iov" or "full"')
101 
102  if meta_key_filter is None:
103  meta_key_filter = []
104 
105  # Disable 'full' and 'iov' in non-Gaudi environments
106  if not isGaudiEnv():
107  if mode in ('full', 'iov'):
108  raise NameError('The following modes are not available in AnalysisBase: "iov" and "full"')
109 
110  msg.info('Current mode used: {0}'.format(mode))
111  msg.info('Current filenames: {0}'.format(filenames))
112 
113  if mode != 'full' and mode !='iov' and len(meta_key_filter) > 0:
114  raise NameError('It is possible to use the meta_key_filter option only for full mode')
115  if meta_key_filter:
116  msg.info('Filter used: {0}'.format(meta_key_filter))
117 
118  # create the storage object for metadata.
119  meta_dict = {}
120 
121  # ----- retrieve metadata from all filename or filenames --------------------------------------------------------#
122  for filename in filenames:
123  meta_dict[filename] = {}
124  current_file_type = None
125  # Determine the file_type of the input and store this information into meta_dict
126  if not file_type:
127  if os.path.isfile(filename):
128 
129  if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and gSystem.AccessPathName(filename): # Attention, bizarre convention of return value!!
130  msg.warn('Ignoring not accessible file: {}'.format(filename))
131  continue
132 
133  with open(filename, 'rb') as binary_file:
134  magic_file = binary_file.read(4)
135 
136  if magic_file == 'root' or magic_file == b'root':
137  current_file_type = 'POOL'
138  meta_dict[filename]['file_type'] = 'POOL'
139 
140  elif Project.determine() in (
141  Project.AnalysisBase, Project.AthAnalysis):
142  raise RuntimeError(
143  f"{filename} is not a ROOT file, assumed bytestream"
144  ", this is not supported in Analysis releases")
145  else:
146  current_file_type = 'BS'
147  meta_dict[filename]['file_type'] = 'BS'
148 
149  # add information about the file_size of the input filename
150  meta_dict[filename]['file_size'] = os.path.getsize(filename)
151 
152  # determine the file type for the remote input files
153  else:
154  if regex_BS_files.match(filename):
155  current_file_type = 'BS'
156  meta_dict[filename]['file_type'] = 'BS'
157  else:
158  current_file_type = 'POOL'
159  meta_dict[filename]['file_type'] = 'POOL'
160 
161  # add information about the file_size of the input filename
162  meta_dict[filename]['file_size'] = None # None -> we can't read the file size for a remote file
163 
164  else:
165  current_file_type = file_type
166 
167  # ----- retrieves metadata from POOL files ------------------------------------------------------------------#
168  if current_file_type == 'POOL':
169 
170  if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and gSystem.AccessPathName(filename): # Attention, bizarre convention of return value!!
171  msg.warn('Ignoring not accessible file: {}'.format(filename))
172  continue
173 
174  import ROOT
175  # open the file using ROOT.TFile
176  current_file = ROOT.TFile.Open( _get_pfn(filename) )
177 
178  # get auto flush setting from the main EventData TTree
179  from PyUtils.PoolFile import PoolOpts
180  collectionTree = current_file.Get(PoolOpts.TTreeNames.EventData)
181  if isinstance(collectionTree, ROOT.TTree):
182  meta_dict[filename]['auto_flush'] = collectionTree.GetAutoFlush()
183 
184  # read and add the 'GUID' value
185  meta_dict[filename]['file_guid'] = _read_guid(filename)
186 
187  # read and add compression level and algorithm
188  meta_dict[filename]['file_comp_alg'] = current_file.GetCompressionAlgorithm()
189  meta_dict[filename]['file_comp_level'] = current_file.GetCompressionLevel()
190 
191  if (
192  isRNTuple(md:=current_file.Get(PoolOpts.RNTupleNames.MetaData))
193  and mode != "tiny"
194  ):
195  msg.warning(
196  "Reading in-file metadata from RNTuple is currently of limited support"
197  )
198  meta_dict[filename]["metadata_items"] = {}
199 
200  try:
201  from ROOT import RNTupleReader
202  except ImportError:
203  from ROOT.Experimental import RNTupleReader
204 
205  reader = RNTupleReader.Open(md)
206  entry = reader.CreateEntry()
207  reader.LoadEntry(0, entry)
208  auxes = {}
209  classes_with_aux = {
210  "xAOD::FileMetaData_v1",
211  "xAOD::FileMetaDataAuxInfo_v1",
212  "xAOD::TriggerMenuJsonAuxContainer_v1",
213  "DataVector<xAOD::TriggerMenuJson_v1>",
214  "xAOD::TruthMetaDataAuxContainer_v1",
215  "DataVector<xAOD::TruthMetaData_v1>",
216  "xAOD::CutBookkeeperContainer_v1",
217  "xAOD::CutBookkeeperAuxContainer_v1",
218  "xAOD::LumiBlockRangeAuxContainer_v1",
219  "DataVector<xAOD::LumiBlockRange_v1>",
220  }
221 
222  dynamic_fmd_items = {}
223 
224  meta_filter = get_meta_filter(mode, meta_key_filter)
225 
226  for field in reader.GetDescriptor().GetTopLevelFields():
227  normalizedName = field.GetFieldName()
228  if "index_ref" in normalizedName:
229  continue
230  if regexIOVMetaDataContainer.match(field.GetTypeName()):
231  # if field name is e.g. IOVMetaDataContainer_p1__Digitization_Parameters,
232  # strip the prefix and change underscore to slash to slash
233  normalizedName = (
234  field.GetFieldName()
235  .replace("IOVMetaDataContainer_p1_", "")
236  .replace("_", "/")
237  )
238  meta_dict[filename]["metadata_items"][normalizedName] = (
239  "IOVMetaDataContainer"
240  )
241  elif regexByteStreamMetadataContainer.match(field.GetTypeName()):
242  meta_dict[filename]["metadata_items"][field.GetFieldName()] = (
243  "ByteStreamMetadataContainer"
244  )
245  elif regexEventStreamInfo.match(field.GetTypeName()):
246  meta_dict[filename]["metadata_items"][field.GetFieldName()] = (
247  "EventStreamInfo"
248  )
249  elif regexXAODFileMetaData.match(field.GetTypeName()):
250  meta_dict[filename]["metadata_items"][
251  field.GetFieldName().replace("xAOD__", "xAOD::")
252  ] = field.GetTypeName()
253  elif regexXAODFileMetaDataAuxDyn.match(
254  normalizedName := field.GetFieldName()
255  .replace("xAOD__", "xAOD::")
256  .replace("AuxDyn:", "AuxDyn.")
257  ):
258  result = (
259  False
260  if entry[field.GetFieldName()] == "\x00"
261  else entry[field.GetFieldName()]
262  )
263  dynamic_fmd_items[normalizedName.split(".")[1]] = result
264  meta_dict[filename]["metadata_items"][normalizedName] = (
265  field.GetTypeName()
266  )
267  continue
268  elif regexXAODFileMetaDataAux.match(field.GetTypeName()):
269  meta_dict[filename]["metadata_items"][
270  field.GetFieldName()
271  .replace("xAOD__", "xAOD::")
272  .replace("Aux:", "Aux.")
273  ] = field.GetTypeName()
274  elif regexXAODTruthMetaData.match(field.GetTypeName()):
275  meta_dict[filename]["metadata_items"][
276  field.GetFieldName()
277  .replace("xAOD__", "xAOD::")
278  .replace("DataVector_", "DataVector<")
279  .replace("__Truth", ">_Truth")
280  ] = "TruthMetaData"
281  elif regexXAODTruthMetaDataAux.match(field.GetTypeName()):
282  meta_dict[filename]["metadata_items"][
283  field.GetFieldName()
284  .replace("xAOD__", "xAOD::")
285  .replace("Aux:", "Aux.")
286  ] = field.GetTypeName()
287  elif regexXAODEventFormat.match(field.GetTypeName()):
288  meta_dict[filename]["metadata_items"][
289  field.GetFieldName().replace("xAOD__", "xAOD::")
290  ] = field.GetTypeName()
291  elif regexXAODTriggerMenuJson.match(field.GetTypeName()):
292  meta_dict[filename]["metadata_items"][
293  field.GetFieldName()
294  .replace("xAOD__", "xAOD::")
295  .replace("DataVector_", "DataVector<")
296  .replace("__Trigger", ">_Trigger")
297  ] = field.GetTypeName()
298  elif regexXAODTriggerMenuJsonAux.match(field.GetTypeName()):
299  meta_dict[filename]["metadata_items"][
300  field.GetFieldName()
301  .replace("xAOD__", "xAOD::")
302  .replace("Aux:", "Aux.")
303  ] = field.GetTypeName()
304  elif regexXAODCutBookkeeperContainer.match(field.GetTypeName()):
305  meta_dict[filename]["metadata_items"][
306  field.GetFieldName()
307  .replace("xAOD__", "xAOD::")
308  .replace("DataVector_", "DataVector<")
309  .replace("__CutBookkeeper", ">_CutBookkeeper")
310  ] = field.GetTypeName()
311  elif regexXAODCutBookkeeperContainerAux.match(field.GetTypeName()):
312  meta_dict[filename]["metadata_items"][
313  field.GetFieldName()
314  .replace("xAOD__", "xAOD::")
315  .replace("Aux:", "Aux.")
316  ] = field.GetTypeName()
317  else:
318  meta_dict[filename]["metadata_items"][
319  field.GetFieldName().replace("Aux:", "Aux.")
320  ] = field.GetTypeName()
321 
322  if field.GetTypeName() in classes_with_aux:
323  # handle aux classes later
324  auxes[field.GetFieldName()] = field.GetTypeName()
325  continue
326 
327  if not should_keep_meta(
328  normalizedName, field.GetTypeName(), meta_filter
329  ):
330  continue
331 
332  try:
333  meta_dict[filename][normalizedName] = _convert_value(
334  entry[field.GetFieldName()]
335  )
336  except KeyError:
337  msg.warning(f"missing type {field.GetTypeName()}")
338 
339  meta_dict[filename]["metadata_items"] = denormalize_metadata_types(
340  meta_dict[filename]["metadata_items"]
341  )
342 
343  def _get_aux_base(aux_key: str) -> str:
344  # Remove known prefixes
345  key = aux_key
346  key = key.replace("xAOD__TriggerMenuJsonAuxContainer_v1_", "")
347  key = key.replace("xAOD__FileMetaDataAuxInfo_v1_", "")
348  key = key.replace("xAOD__TruthMetaDataAuxContainer_v1_", "")
349  # Remove known suffixes
350  if key.endswith("Aux:"):
351  key = key[:-4]
352  elif key.endswith("Aux"):
353  key = key[:-3]
354  # Remove any trailing ':' or '_'
355  key = key.strip("_:")
356  return key
357 
358  def _get_main_base(main_key: str) -> str:
359  main_base = main_key
360  # For DataVectors
361  if main_key.startswith("DataVector_xAOD__TriggerMenuJson_v1__"):
362  main_base = main_key.replace(
363  "DataVector_xAOD__TriggerMenuJson_v1__", ""
364  )
365  # For FileMetaData
366  elif main_key.startswith("xAOD__FileMetaData_v1_"):
367  main_base = main_key.replace("xAOD__FileMetaData_v1_", "")
368  # For TruthMetaData
369  elif main_key.startswith("DataVector_xAOD__TruthMetaData_v1__"):
370  main_base = main_key.replace(
371  "DataVector_xAOD__TruthMetaData_v1__", ""
372  )
373  return main_base
374 
375  def _find_associated_pairs(auxes: dict) -> list[tuple[str, str]]:
376  # Build lookup tables
377  aux_map = {}
378  for k in auxes:
379  if "Aux" in k:
380  aux_map[_get_aux_base(k)] = k
381 
382  main_map = {}
383  for k in auxes:
384  base = _get_main_base(k)
385  if base:
386  main_map[base] = k
387 
388  # Find pairs
389  pairs = []
390  for base, aux_key in aux_map.items():
391  if base in main_map:
392  pairs.append((aux_key, main_map[base]))
393  return pairs
394 
395  for pair in _find_associated_pairs(auxes):
396  return_obj = _convert_value(
397  entry[pair[1]],
398  entry[pair[0]],
399  )
400  key = next(
401  (
402  k
403  for k, v in trigger_menu_json_map.items()
404  if v
405  == pair[1]
406  .replace("xAOD__", "xAOD::")
407  .replace("DataVector_", "DataVector<")
408  .replace("__Trigger", ">_Trigger")
409  ),
410  auxes[pair[0]],
411  )
412 
413  try:
414  key = (
415  key.replace("xAOD__", "xAOD::")
416  if key.count("_") <= 1
417  else key.replace("xAOD__", "xAOD::").rsplit("_", 2)[0]
418  )
419  except IndexError:
420  pass
421 
422  if not should_keep_meta(
423  pair[0]
424  .replace("xAOD__", "xAOD::")
425  .replace("DataVector_", "DataVector<")
426  .replace("__Trigger", ">_Trigger")
427  .replace("Aux:", "Aux."),
428  key,
429  meta_filter,
430  ):
431  continue
432 
433  if "TriggerMenuJson" in pair[0]:
434  if "RAWTriggerMenuJson" in return_obj:
435  key = (
436  pair[1]
437  if pair[0].startswith("Trigger")
438  else trigger_menu_json_map[pair[0]]
439  )
440  meta_dict[filename][key] = return_obj["RAWTriggerMenuJson"]
441  del return_obj["RAWTriggerMenuJson"]
442  if "TriggerConfigInfo" not in meta_dict[filename]:
443  meta_dict[filename]["TriggerConfigInfo"] = {}
444  if "dbkey" in return_obj:
445  meta_dict[filename]["TriggerConfigInfo"][
446  pair[0].split("_")[-1].replace("Aux:", "")
447  ] = {"key": return_obj["dbkey"], "name": return_obj["name"]}
448  del return_obj["dbkey"]
449  del return_obj["name"]
450  if "TriggerMenu" not in meta_dict[filename]:
451  meta_dict[filename]["TriggerMenu"] = {}
452  meta_dict[filename]["TriggerMenu"].update(return_obj)
453  elif "FileMetaData" in pair[0]:
454  if "FileMetaData" not in meta_dict[filename]:
455  meta_dict[filename]["FileMetaData"] = dynamic_fmd_items
456  meta_dict[filename]["FileMetaData"].update(return_obj)
457  elif "TruthMetaData" in pair[0]:
458  if pair == ("TruthMetaDataAux:", "TruthMetaData"):
459  if "TruthMetaData" not in meta_dict[filename]:
460  meta_dict[filename]["TruthMetaData"] = {}
461  meta_dict[filename]["TruthMetaData"].update(return_obj)
462  else:
463  # for backward compatibility
464  meta_dict[filename][
465  pair[1]
466  .replace("xAOD__", "xAOD::")
467  .replace("DataVector_", "DataVector<")
468  .replace("__Truth", ">_Truth")
469  ] = {}
470  meta_dict[filename][
471  pair[0]
472  .replace("xAOD__", "xAOD::")
473  .replace("Aux:", "Aux.")
474  ] = {}
475  elif pair == ("CutBookkeepersAux:", "CutBookkeepers"):
476  meta_dict[filename]["CutBookkeepers"] = return_obj
477 
478  msg.debug(f"Read metadata from RNTuple: {meta_dict[filename]}")
479 
480  else:
481  # ----- read extra metadata required for 'lite' and 'full' modes ----------------------------------------#
482  if mode != 'tiny':
483  # selecting from all tree the only one which contains metadata, respectively "MetaData"
484  metadata_tree = current_file.Get('MetaData')
485  # read all list of branches stored in "MetaData" tree
486  metadata_branches = metadata_tree.GetListOfBranches()
487  nr_of_branches = metadata_branches.GetEntriesFast()
488 
489  # object to store the names of metadata containers and their corresponding class name.
490  meta_dict[filename]['metadata_items'] = {}
491 
492  meta_filter = get_meta_filter(mode, meta_key_filter)
493 
494  # store all persistent classes for metadata container existing in a POOL/ROOT file.
495  persistent_instances = {}
496  dynamic_fmd_items = {}
497 
498  # Protect non-Gaudi environments from meta-data classes it doesn't know about
499  if not isGaudiEnv():
500  metadata_tree.SetBranchStatus("*", False)
501 
502  for i in range(0, nr_of_branches):
503  branch = metadata_branches.At(i)
504  name = branch.GetName()
505  if name == 'index_ref':
506  # skip the index branch
507  continue
508 
509  class_name = branch.GetClassName()
510 
511  if regexIOVMetaDataContainer.match(class_name):
512  name = name.replace('IOVMetaDataContainer_p1_', '').replace('_', '/')
513 
514  if regexIOVMetaDataContainer.match(class_name):
515  meta_dict[filename]['metadata_items'][name] = 'IOVMetaDataContainer'
516  elif regexByteStreamMetadataContainer.match(class_name):
517  meta_dict[filename]['metadata_items'][name] = 'ByteStreamMetadataContainer'
518  elif regexEventStreamInfo.match(class_name):
519  meta_dict[filename]['metadata_items'][name] = 'EventStreamInfo'
520  elif regexXAODFileMetaData.match(class_name):
521  meta_dict[filename]['metadata_items'][name] = 'FileMetaData'
522  elif regexXAODTruthMetaData.match(class_name):
523  meta_dict[filename]['metadata_items'][name] = 'TruthMetaData'
524  else:
525  type_name = class_name
526  if not type_name:
527  try:
528  type_name = branch.GetListOfLeaves()[0].GetTypeName()
529  except IndexError:
530  pass
531  meta_dict[filename]['metadata_items'][name] = type_name
532 
533  if len(meta_filter) > 0:
534  keep = False
535  for filter_key, filter_class in meta_filter.items():
536  if (filter_key.replace('/', '_') in name.replace('/', '_') or filter_key == '*') and fnmatchcase(class_name, filter_class):
537  if 'CutBookkeepers' in filter_key:
538  keep = filter_key == name
539  if keep:
540  break
541  else:
542  keep = True
543  break
544 
545  if not keep:
546  continue
547  else:
548  # CutBookkeepers should always be filtered:
549  if 'CutBookkeepers' in name and name not in ['CutBookkeepers', 'CutBookkeepersAux.']:
550  continue
551 
552  if not isGaudiEnv():
553  metadata_tree.SetBranchStatus(f"{name}*", True)
554 
555  # assign the corresponding persistent class based of the name of the metadata container
556  if regexEventStreamInfo.match(class_name):
557  if class_name.endswith('_p1'):
558  persistent_instances[name] = ROOT.EventStreamInfo_p1()
559  elif class_name.endswith('_p2'):
560  persistent_instances[name] = ROOT.EventStreamInfo_p2()
561  else:
562  persistent_instances[name] = ROOT.EventStreamInfo_p3()
563  elif regexIOVMetaDataContainer.match(class_name):
564  persistent_instances[name] = ROOT.IOVMetaDataContainer_p1()
565  elif regexXAODEventFormat.match(class_name):
566  persistent_instances[name] = ROOT.xAOD.EventFormat_v1()
567  elif regexXAODTriggerMenu.match(class_name) and _check_project() not in ['AthGeneration']:
568  persistent_instances[name] = ROOT.xAOD.TriggerMenuContainer_v1()
569  elif regexXAODTriggerMenuAux.match(class_name) and _check_project() not in ['AthGeneration']:
570  persistent_instances[name] = ROOT.xAOD.TriggerMenuAuxContainer_v1()
571  elif regexXAODTriggerMenuJson.match(class_name) and _check_project() not in ['AthGeneration']:
572  persistent_instances[name] = ROOT.xAOD.TriggerMenuJsonContainer_v1()
573  elif regexXAODTriggerMenuJsonAux.match(class_name) and _check_project() not in ['AthGeneration']:
574  persistent_instances[name] = ROOT.xAOD.TriggerMenuJsonAuxContainer_v1()
575  elif regexXAODCutBookkeeperContainer.match(class_name):
576  persistent_instances[name] = ROOT.xAOD.CutBookkeeperContainer_v1()
577  elif regexXAODCutBookkeeperContainerAux.match(class_name):
578  persistent_instances[name] = ROOT.xAOD.CutBookkeeperAuxContainer_v1()
579  elif regexXAODFileMetaData.match(class_name):
580  persistent_instances[name] = ROOT.xAOD.FileMetaData_v1()
581  elif regexXAODFileMetaDataAux.match(class_name):
582  persistent_instances[name] = ROOT.xAOD.FileMetaDataAuxInfo_v1()
583  elif regexXAODTruthMetaData.match(class_name):
584  persistent_instances[name] = ROOT.xAOD.TruthMetaDataContainer_v1()
585  elif regexXAODTruthMetaDataAux.match(class_name):
586  persistent_instances[name] = ROOT.xAOD.TruthMetaDataAuxContainer_v1()
587 
588  if name in persistent_instances:
589  branch.SetAddress(ROOT.AddressOf(persistent_instances[name]))
590 
591  # This creates a dict to store the dynamic attributes of the xAOD::FileMetaData
592  dynamicFMD = regexXAODFileMetaDataAuxDyn.match(name)
593  if dynamicFMD:
594  dynamicName = dynamicFMD.group().split('.')[-1]
595  dynamicType = regex_cppname.match(class_name)
596  if dynamicType:
597  # this should be a string
598  dynamic_fmd_items[dynamicName] = ROOT.std.string()
599  branch.SetAddress(ROOT.AddressOf(dynamic_fmd_items[dynamicName]))
600  else:
601  dynamic_fmd_items[dynamicName] = None
602 
603 
604  metadata_tree.GetEntry(0)
605 
606  # This loads the dynamic attributes of the xAOD::FileMetaData from the TTree
607  for key in dynamic_fmd_items:
608  if dynamic_fmd_items[key] is None:
609  try:
610  if key.startswith("is"):
611  # this is probably a boolean
612  dynamic_fmd_items[key] = getattr(metadata_tree, key) != '\x00'
613  else:
614  # this should be a float
615  dynamic_fmd_items[key] = getattr(metadata_tree, key)
616  except AttributeError:
617  # should not happen, but just ignore missing attributes
618  pass
619  else:
620  # convert ROOT.std.string objects to python equivalent
621  dynamic_fmd_items[key] = str(dynamic_fmd_items[key])
622 
623  # clean the meta-dict if the meta_key_filter flag is used, to return only the key of interest
624  if meta_key_filter:
625  meta_dict[filename] = {}
626 
627  # read the metadata
628  for name, content in persistent_instances.items():
629  key = name
630  if hasattr(content, 'm_folderName'):
631  key = content.m_folderName
632 
633  # Some transition AODs contain both the Run2 and Run3 metadata formats. We only wish to read the Run3 format if such a file is encountered.
634  has_r3_trig_meta = ('TriggerMenuJson_HLT' in persistent_instances or 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT' in persistent_instances)
635  aux = None
636  if key.startswith('TriggerMenuJson_') and not key.endswith('Aux.'): # interface container for the menu (AOD)
637  aux = persistent_instances[key+'Aux.']
638  elif key.startswith('DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_') and not key.endswith('Aux.'): # interface container for the menu (ESD)
639  menuPart = key.split('_')[-1]
640  aux = persistent_instances['xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_'+menuPart+'Aux.']
641  elif key == 'TriggerMenu' and 'TriggerMenuAux.' in persistent_instances and not has_r3_trig_meta: # AOD case (legacy support, HLT and L1 menus)
642  aux = persistent_instances['TriggerMenuAux.']
643  elif key == 'DataVector<xAOD::TriggerMenu_v1>_TriggerMenu' and 'xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.' in persistent_instances and not has_r3_trig_meta: # ESD case (legacy support, HLT and L1 menus)
644  aux = persistent_instances['xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.']
645  elif (key == 'CutBookkeepers'
646  and 'CutBookkeepersAux.' in persistent_instances):
647  aux = persistent_instances['CutBookkeepersAux.']
648  elif key == 'CutBookkeepersAux.':
649  continue # Extracted using the interface object
650  elif (key == 'FileMetaData'
651  and 'FileMetaDataAux.' in persistent_instances):
652  aux = persistent_instances['FileMetaDataAux.']
653  elif (key == 'xAOD::FileMetaData_v1_FileMetaData'
654  and 'xAOD::FileMetaDataAuxInfo_v1_FileMetaDataAux.' in persistent_instances):
655  aux = persistent_instances['xAOD::FileMetaDataAuxInfo_v1_FileMetaDataAux.']
656  elif (key == 'TruthMetaData'
657  and 'TruthMetaDataAux.' in persistent_instances):
658  aux = persistent_instances['TruthMetaDataAux.']
659  elif key == 'TruthMetaDataAux.':
660  continue # Extracted using the interface object
661  elif 'Menu' in key and key.endswith('Aux.'):
662  continue # Extracted using the interface object
663 
664  return_obj = _convert_value(content, aux)
665 
666  if 'TriggerMenuJson' in key or ('TriggerMenu' in key and not has_r3_trig_meta):
667  if 'RAWTriggerMenuJson' in return_obj:
668  meta_dict[filename][key] = return_obj['RAWTriggerMenuJson']
669  del return_obj['RAWTriggerMenuJson']
670  if 'TriggerConfigInfo' not in meta_dict[filename]:
671  meta_dict[filename]['TriggerConfigInfo'] = {}
672  if 'dbkey' in return_obj:
673  meta_dict[filename]['TriggerConfigInfo'][key.split('_')[-1]] = {
674  'key' : return_obj['dbkey'],
675  'name': return_obj['name']
676  }
677  del return_obj['dbkey']
678  del return_obj['name']
679  if 'TriggerMenu' not in meta_dict[filename]:
680  meta_dict[filename]['TriggerMenu'] = {}
681  meta_dict[filename]['TriggerMenu'].update(return_obj)
682  elif "FileMetaData" in key:
683  if "FileMetaData" not in meta_dict[filename]:
684  meta_dict[filename]["FileMetaData"] = dynamic_fmd_items
685  meta_dict[filename]["FileMetaData"].update(return_obj)
686  else:
687  meta_dict[filename][key] = return_obj
688 
689  try:
690  # get the number of events from EventStreamInfo
691  esi_dict = next(key for key, value in meta_dict[filename].items()
692  if isinstance(value, dict) and "numberOfEvents" in value and
693  meta_dict[filename]["metadata_items"][key] == "EventStreamInfo")
694  msg.debug(f"{esi_dict=}")
695  meta_dict[filename]["nentries"] = meta_dict[filename][esi_dict]["numberOfEvents"]
696  except StopIteration as err:
697  msg.debug(f"Caught {err=}, {type(err)=}, falling back on opening the DataHeader"
698  " Container to read the number of entries")
699  meta_dict[filename]['nentries'] = dataheader_nentries(current_file)
700  msg.debug(f"{meta_dict[filename]['nentries']=}")
701 
702  if unique_tag_info_values and mode=='iov':
703  unique_tag_info_values = False
704  msg.info('disabling "unique_tag_info_values" option for "iov" mode')
705 
706  # This is a required workaround which will temporarily be fixing ATEAM-560 originated from ATEAM-531
707  # ATEAM-560: https://its.cern.ch/jira/browse/ATEAM-560
708  # ATEAM-531: https://its.cern.ch/jira/browse/ATEAM-531
709  # This changes will remove all duplicates values presented in some files due
710  # to the improper merging of two IOVMetaDataContainers.
711  if unique_tag_info_values:
712  msg.info('MetaReader is called with the parameter "unique_tag_info_values" set to True. '
713  'This is a workaround to remove all duplicate values from "/TagInfo" key')
714  if '/TagInfo' in meta_dict[filename]:
715  for key, value in meta_dict[filename]['/TagInfo'].items():
716  if isinstance(value, list) and value:
717  if len(unique_values := set(value)) > 1:
718  msg.warn(
719  f"Found multiple values for {key}: {value}. "
720  "Looking for possible duplicates."
721  )
722  maybe_ok = False
723  if key == "AMITag":
724  # curate duplicates like: ['s3681_q453', 's3681_q453_'] or ["s3681_q453", "q453_s3681"]
725  unique_amitags = set()
726  for amitags in unique_values:
727  unique_amitags.add(
728  "_".join({tag for tag in amitags.split("_") if tag})
729  )
730  if len(unique_amitags) == 1:
731  maybe_ok = True
732  elif key == "beam_energy":
733  # handle duplicates like: ['6500000', '6500000.0'] or [3, "3"]
734  unique_energies = set()
735  for energy in unique_values:
736  try:
737  energy = int(energy)
738  except ValueError:
739  try:
740  energy = float(energy)
741  except ValueError:
742  pass
743  unique_energies.add(energy)
744  if len(unique_energies) == 1:
745  maybe_ok = True
746  elif key in ["AtlasRelease", "IOVDbGlobalTag", "AODFixVersion"]:
747  maybe_ok = True
748  if maybe_ok:
749  msg.warn(
750  f"Multiple values for {key} may mean the same, or "
751  "the input file was produced in multi-step job. "
752  f"Ignoring all but the first entry: {key} = {value[0]}"
753  )
754  else:
755  raise ValueError(
756  f"{key} from /TagInfo contains more than 1 unique value: {value}"
757  )
758 
759  meta_dict[filename]['/TagInfo'][key] = value[0]
760 
761  if promote is None:
762  promote = mode == 'lite' or mode == 'peeker'
763 
764  # Filter the data and create a prettier output for the 'lite' mode
765  if mode == 'lite':
766  meta_dict = make_lite(meta_dict)
767 
768  if mode == 'peeker':
769  meta_dict = make_peeker(meta_dict)
770 
771  if promote:
772  meta_dict = promote_keys(meta_dict, mode)
773 
774  # If AnalysisBase the itemList must be grabbed another way
775  if not isGaudiEnv():
776  if isinstance(collectionTree, ROOT.TTree):
777  meta_dict[filename]['itemList'] = [ (b.GetClassName(), b.GetName()) for b in collectionTree.GetListOfBranches() ]
778 
779  # ----- retrieves metadata from bytestream (BS) files (RAW, DRAW) ------------------------------------------#
780  elif current_file_type == 'BS':
781 
782  if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and not os.path.isfile(filename):
783  msg.warn('Ignoring not accessible file: {}'.format(filename))
784  continue
785 
786  import eformat
787 
788  # store the number of entries
789  bs = eformat.istream(filename)
790  meta_dict[filename]['nentries'] = bs.total_events
791 
792  # store the 'guid' value
793  data_reader = eformat.EventStorage.pickDataReader(filename)
794  assert data_reader, 'problem picking a data reader for file [%s]' % filename
795 
796  # set auto flush equivalent, which for BS is always 1
797  meta_dict[filename]['auto_flush'] = 1
798 
799  if hasattr(data_reader, 'GUID'):
800  meta_dict[filename]['file_guid'] = data_reader.GUID()
801 
802  # compression level and algorithm, for BS always ZLIB
803  meta_dict[filename]['file_comp_alg'] = 1
804  meta_dict[filename]['file_comp_level'] = 1
805 
806 
807  # if the flag full is set to true then grab all metadata
808  # ------------------------------------------------------------------------------------------------------#
809  if mode != "tiny":
810  bs_metadata = {}
811 
812  for md in data_reader.freeMetaDataStrings():
813  if md.startswith('Event type:'):
814  k = 'eventTypes'
815  v = []
816  if 'is sim' in md:
817  v.append('IS_SIMULATION')
818  else:
819  v.append('IS_DATA')
820 
821  if 'is atlas' in md:
822  v.append('IS_ATLAS')
823  else:
824  v.append('IS_TESTBEAM')
825 
826  if 'is physics' in md:
827  v.append('IS_PHYSICS')
828  else:
829  v.append('IS_CALIBRATION')
830 
831  bs_metadata[k] = tuple(v)
832 
833  elif md.startswith('GeoAtlas:'):
834  k = 'geometry'
835  v = md.split('GeoAtlas:')[1].strip()
836  bs_metadata[k] = v
837 
838  elif md.startswith('IOVDbGlobalTag:'):
839  k = 'conditions_tag'
840  v = md.split('IOVDbGlobalTag:')[1].strip()
841  bs_metadata[k] = v
842 
843  elif '=' in md:
844  k, v = md.split('=')
845  bs_metadata[k] = v
846 
847  bs_metadata['detectorMask'] = data_reader.detectorMask()
848  bs_metadata['runNumbers'] = data_reader.runNumber()
849  bs_metadata['lumiBlockNumbers'] = data_reader.lumiblockNumber()
850  bs_metadata['projectTag'] = data_reader.projectTag()
851  bs_metadata['stream'] = data_reader.stream()
852  #bs_metadata['beamType'] = getattr(data_reader, 'beamType')()
853  beamTypeNbr= data_reader.beamType()
854  #According to info from Rainer and Guiseppe the beam type is
855  #O: no beam
856  #1: protons
857  #2: ions
858  if (beamTypeNbr==0): bs_metadata['beamType'] = 'cosmics'
859  elif (beamTypeNbr==1 or beamTypeNbr==2): bs_metadata['beamType'] = 'collisions'
860  else: bs_metadata['beamType'] = 'unknown'
861 
862  bs_metadata['beamEnergy'] = data_reader.beamEnergy()
863 
864  meta_dict[filename]['eventTypes'] = bs_metadata.get('eventTypes', [])
865  meta_dict[filename]['GeoAtlas'] = bs_metadata.get('geometry', None)
866  meta_dict[filename]['conditions_tag'] = bs_metadata.get('conditions_tag', None)
867  meta_dict[filename]['project_name'] = bs_metadata.get('projectTag', None)
868 
869  # Promote up one level
870  meta_dict[filename]['detectorMask'] = [bs_metadata.get('detectorMask', None)]
871  meta_dict[filename]['runNumbers'] = [bs_metadata.get('runNumbers', None)]
872  meta_dict[filename]['lumiBlockNumbers'] = [bs_metadata.get('lumiBlockNumbers', None)]
873  meta_dict[filename]['beam_type'] = bs_metadata.get('beamType', None)
874  meta_dict[filename]['beam_energy'] = bs_metadata.get('beamEnergy', None)
875  meta_dict[filename]['stream'] = bs_metadata.get('stream', None)
876 
877  if not data_reader.good():
878  # event-less file...
879  meta_dict[filename]['runNumbers'].append(bs_metadata.get('run_number', 0))
880  meta_dict[filename]['lumiBlockNumbers'].append(bs_metadata.get('LumiBlock', 0))
881 
882  msg.debug(f"{meta_dict[filename]=}")
883  msg.debug(f"{len(bs)=}")
884  if len(bs):
885  evt = bs[0]
886  try:
887  evt.check()
888  meta_dict[filename]['processingTags'] = [tag.name for tag in evt.stream_tag()]
889  meta_dict[filename]['evt_number'] = [evt.global_id()]
890  meta_dict[filename]['run_type'] = [eformat.helper.run_type2string(evt.run_type())]
891  # ATLASRECTS-7126: If there is no valid lumiblock information
892  # in the ByteStream header, get the info from the first event.
893  if meta_dict[filename]['lumiBlockNumbers'] == [0]:
894  msg.debug('Taking the luminosity block info from the first event (%i)', evt.lumi_block())
895  meta_dict[filename]['lumiBlockNumbers'] = [evt.lumi_block()]
896  # ATLASRECTS-7126: If there is no valid run number information
897  # in the ByteStream header, get the info from the first event.
898  if meta_dict[filename]['runNumbers'] == [0]:
899  msg.debug('Taking the run number info from the first event (%i)', evt.run_no())
900  meta_dict[filename]['runNumbers'] = [evt.run_no()]
901  except RuntimeError as err:
902  msg.error("Issue while reading the first event of BS file %r: %r", filename, err)
903  else:
904  msg.debug(f"{meta_dict[filename]=}")
905  else:
906  msg.warn(f"Event-less BS {filename=}, will not read metadata information from the first event")
907 
908  # fix for ATEAM-122
909  if len(bs_metadata.get('eventTypes', '')) == 0: # see: ATMETADATA-6
910  evt_type = ['IS_DATA', 'IS_ATLAS']
911  if bs_metadata.get('stream', '').startswith('physics_'):
912  evt_type.append('IS_PHYSICS')
913  elif bs_metadata.get('stream', '').startswith('calibration_'):
914  evt_type.append('IS_CALIBRATION')
915  elif bs_metadata.get('projectTag', '').endswith('_calib'):
916  evt_type.append('IS_CALIBRATION')
917  else:
918  evt_type.append('Unknown')
919 
920  meta_dict[filename]['eventTypes'] = evt_type
921 
922  if mode == 'full':
923  meta_dict[filename]['bs_metadata'] = bs_metadata
924 
925  # ------ Throw an error if the user provide other file types -------------------------------------------------#
926  else:
927  msg.error('Unknown filetype for {0} - there is no metadata interface for type {1}'.format(filename, current_file_type))
928  return None
929 
930  return meta_dict
931 
932 
934  import os
935  if 'AthSimulation_DIR' in os.environ:
936  return 'AthSimulation'
937  if 'AthGeneration_DIR' in os.environ:
938  return 'AthGeneration'
939  return 'Athena'
940 
941 
942 def _get_pfn(filename):
943  """
944  Extract the actual filename if LFN or PFN notation is used
945  """
946  pfx = filename[0:4]
947  if pfx == 'PFN:':
948  return filename[4:]
949  if pfx == 'LFN:':
950  import subprocess, os
951  os.environ['POOL_OUTMSG_LEVEL'] = 'Error'
952  output = subprocess.check_output(['FClistPFN','-l',filename[4:]],text=True).split('\n')
953  if len(output) == 2:
954  return output[0]
955  msg.error( 'FClistPFN({0}) returned unexpected number of lines:'.format(filename) )
956  msg.error( '\n'.join(output) )
957  return filename
958 
959 
960 def _read_guid(filename):
961  """
962  Extracts the "guid" (Globally Unique Identifier) in POOL files and Grid catalogs) value from a POOL file.
963  :param filename: the input file
964  :return: the guid value, None if unavailable
965  """
966  import ROOT
967  root_file = ROOT.TFile.Open( _get_pfn(filename) )
968  params = root_file.Get('##Params')
969  try:
970  from ROOT import RNTuple as rnt
971  except ImportError:
972  from ROOT.Experimental import RNTuple as rnt
973  if not params:
974  return
975  if not isinstance(params, ROOT.TTree) and not isinstance(params, rnt) and not isinstance(params, ROOT.TDirectory):
976  raise NotImplementedError(f"Cannot extract GUID from object {params!r} of type {type(params)!r}")
977 
978  regex = re.compile(r'\[NAME=(\w+)\]\[VALUE=(.*)\]', re.ASCII)
979  fid = None
980 
981  if isinstance(params, ROOT.TTree):
982  for entry in params:
983  param = entry.GetLeaf('db_string').GetValueString()
984  result = regex.match(param)
985  if result and result.group(1) == 'FID' :
986  # don't exit yet, it's the last FID entry that counts
987  fid = result.group(2)
988  elif isinstance(params, rnt):
989  try:
990  from ROOT import RNTupleReader
991  except ImportError:
992  from ROOT.Experimental import RNTupleReader
993  reader = RNTupleReader.Open(params)
994  try:
995  entry = reader.CreateEntry()
996  except AttributeError:
997  entry = reader.GetModel().CreateEntry()
998  for idx in range(reader.GetNEntries()):
999  reader.LoadEntry(idx, entry)
1000  try:
1001  result = regex.match(str(entry['db_string']))
1002  except (AttributeError, TypeError) as err:
1003  # Early RNTuple implementation doesn't allow reading
1004  # strings on the python side, might be triggering it...
1005  msg.error(f"Cannot read FID from ##Params in RNTuple w/ ROOT error: {err}")
1006  return None
1007  if result and result.group(1) == 'FID' :
1008  # don't exit yet, it's the last FID entry that counts
1009  fid = result.group(2)
1010  elif isinstance(params, ROOT.TDirectory):
1011  for key in params.GetListOfKeys():
1012  param = params.Get(key.GetName())
1013  result = regex.match(str(param))
1014  if result and result.group(1) == 'FID' :
1015  # don't exit yet, it's the last FID entry that counts
1016  fid = result.group(2)
1017 
1018  return fid
1019 
1020 
1022  result = {}
1023 
1024  for meth in dir(obj):
1025  if not meth.startswith('_'):
1026  if meth.startswith('m_'):
1027 
1028  field_name = str(meth)[2:]
1029  field_value = getattr(obj, meth)
1030 
1031  result[field_name] = _convert_value(field_value)
1032 
1033  return result
1034 
1035 
1036 def _convert_value(value, aux = None):
1037  cl=value.__class__
1038  if hasattr(cl, '__cpp_name__'):
1039  result = regex_cppname.match(cl.__cpp_name__)
1040  if result:
1041  cpp_type = result.group(1)
1042  if cpp_type == 'vector' or cpp_type == 'std::vector':
1043  return [_convert_value(val) for val in value]
1044  elif cpp_type == 'set' or cpp_type == 'std::set':
1045  return {_convert_value(val) for val in value}
1046  elif cpp_type == 'pair' or cpp_type == 'std::pair':
1047  return _convert_value(value.first), _convert_value(value.second)
1048 
1049  # elif cpp_type == 'long':
1050  # return int(value)
1051 
1052  elif cpp_type == 'string' or cpp_type == 'std::string':
1053  return str(value)
1054 
1055  elif cl.__cpp_name__ == "_Bit_reference":
1056  return bool(value)
1057 
1058  # special case which extracts data in a better format from IOVPayloadContainer_p1 class
1059  elif cl.__cpp_name__ == 'IOVMetaDataContainer_p1':
1060  return _extract_fields_iovmdc(value)
1061 
1062  elif cl.__cpp_name__ == 'IOVPayloadContainer_p1':
1063  global _gbl_mode
1064  if _gbl_mode == 'iov':
1065  return _extract_iov_detailed(value)
1066  else:
1067  return _extract_fields_iov( value, range(value.m_attrIndexes.size()) )
1068 
1069  elif cl.__cpp_name__ == 'xAOD::EventFormat_v1':
1070  return _extract_fields_ef(value)
1071  elif cl.__cpp_name__ == 'xAOD::CutBookkeeperContainer_v1':
1072  return _extract_fields_cbk(interface=value, aux=aux)
1073  elif cl.__cpp_name__ == 'xAOD::FileMetaData_v1':
1074  return _extract_fields_fmd(interface=value, aux=aux)
1075  elif cl.__cpp_name__ == 'DataVector<xAOD::TruthMetaData_v1>':
1076  return _extract_fields_tmd(interface=value, aux=aux)
1077 
1078  elif cl.__cpp_name__ == 'DataVector<xAOD::TriggerMenu_v1>' :
1079  return _extract_fields_triggermenu(interface=value, aux=aux)
1080 
1081  elif cl.__cpp_name__ == 'DataVector<xAOD::TriggerMenuJson_v1>' :
1082  return _extract_fields_triggermenujson(interface=value, aux=aux)
1083 
1084  elif (cl.__cpp_name__ == 'EventStreamInfo_p1' or
1085  cl.__cpp_name__ == 'EventStreamInfo_p2' or
1086  cl.__cpp_name__ == 'EventStreamInfo_p3'):
1087  return _extract_fields_esi(value)
1088 
1089  elif (cl.__cpp_name__ == 'EventType_p1' or
1090  cl.__cpp_name__ == 'EventType_p3'):
1091  fields = _extract_fields(value)
1092  fields = _convert_event_type_bitmask(fields)
1093  fields = _convert_event_type_user_type(fields)
1094  return fields
1095 
1096  elif regex_persistent_class.match(cl.__cpp_name__):
1097  return _extract_fields(value)
1098 
1099  return value
1100 
1101 
1102 def _get_attribute_val(iov_container, attr_name, attr_idx):
1103  type_idx = attr_idx.typeIndex()
1104  obj_idx = attr_idx.objIndex()
1105 
1106  attr_value = None
1107 
1108  if type_idx == 0:
1109  attr_value = bool(iov_container.m_bool[obj_idx])
1110  elif type_idx == 1:
1111  attr_value = int(iov_container.m_char[obj_idx])
1112  elif type_idx == 2:
1113  attr_value = int(iov_container.m_unsignedChar[obj_idx])
1114  elif type_idx == 3:
1115  attr_value = int(iov_container.m_short[obj_idx])
1116  elif type_idx == 4:
1117  attr_value = int(iov_container.m_unsignedShort[obj_idx])
1118  elif type_idx == 5:
1119  attr_value = int(iov_container.m_int[obj_idx])
1120  elif type_idx == 6:
1121  attr_value = int(iov_container.m_unsignedInt[obj_idx])
1122  elif type_idx == 7:
1123  attr_value = int(iov_container.m_long[obj_idx])
1124  elif type_idx == 8:
1125  attr_value = int(iov_container.m_unsignedLong[obj_idx])
1126  elif type_idx == 9:
1127  attr_value = int(iov_container.m_longLong[obj_idx])
1128  elif type_idx == 10:
1129  attr_value = int(iov_container.m_unsignedLongLong[obj_idx])
1130  elif type_idx == 11:
1131  attr_value = float(iov_container.m_float[obj_idx])
1132  elif type_idx == 12:
1133  attr_value = float(iov_container.m_double[obj_idx])
1134  elif type_idx == 13:
1135  # skipping this type because is file IOVPayloadContainer_p1.h (line 120) is commented and not considered
1136  pass
1137  elif type_idx == 14:
1138  attr_value = str(iov_container.m_string[obj_idx])
1139  # Cleaning class name from value
1140  if attr_value.startswith('IOVMetaDataContainer_p1_'):
1141  attr_value = attr_value.replace('IOVMetaDataContainer_p1_', '')
1142  if attr_value.startswith('_'):
1143  attr_value = attr_value.replace('_', '/')
1144  # Now it is clean
1145  elif type_idx == 15:
1146  attr_value = int(iov_container.m_date[obj_idx])
1147  elif type_idx == 16:
1148  attr_value = int(iov_container.m_timeStamp[obj_idx])
1149  else:
1150  raise ValueError('Unknown type id {0} for attribute {1}'.format(type_idx, attr_name))
1151 
1152  return attr_value
1153 
1154 
1155 def _extract_fields_iov( iov_container, idx_range ):
1156  result = {}
1157 
1158  for idx in idx_range:
1159  attr_idx = iov_container.m_attrIndexes[idx]
1160  name_idx = attr_idx.nameIndex()
1161  attr_name = iov_container.m_attrName[name_idx]
1162  attr_value = _get_attribute_val(iov_container, attr_name, attr_idx)
1163 
1164  if attr_name not in result:
1165  result[attr_name] = [attr_value]
1166  else:
1167  result[attr_name].append(attr_value)
1168 
1169  max_element_count = 0
1170  for content in result.values():
1171  if len(content) > max_element_count:
1172  max_element_count = len(content)
1173 
1174  if max_element_count <= 1:
1175  for name, content in result.items():
1176  if len(content) > 0:
1177  result[name] = content[0]
1178  else:
1179  result[name] = None
1180 
1181  return result
1182 
1183 
1184 def _extract_iov_detailed(iov_container):
1185  def iovtostr(t):
1186  # break iov time into high and low halves (run number usually in the higher half)
1187  return "({h}:{l})".format(h=t>>32, l=t&(2^32-1))
1188 
1189  def extract_list_collection(iov_container, listCollection ):
1190  result = {}
1191  ln = 0
1192  for list in listCollection.m_attrLists:
1193  ln = ln + 1
1194  lname = 'List {ln}: iov=[{s} ,{e}]; Channel#={ch}'.format(
1195  ln=ln, s=iovtostr(list.m_range.m_start),
1196  e=iovtostr(list.m_range.m_stop),
1197  ch=list.m_channelNumber )
1198  result[ lname ] = _extract_fields_iov( iov_container, range(list.m_firstIndex, list.m_lastIndex) )
1199  return result
1200 
1201  result = {}
1202  pn = 0
1203  for listCollection in iov_container.m_payloadVec:
1204  pn = pn + 1
1205  pname = 'IOV range {n}: [{s}, {e}]'.format(n=pn, s=iovtostr(listCollection.m_start),
1206  e=iovtostr(listCollection.m_stop))
1207  result[ pname ] = extract_list_collection(iov_container, listCollection )
1208  return result
1209 
1210 
1212  return _convert_value(value.m_payload)
1213 
1214 
1216  result = {}
1217 
1218  result['eventTypes'] = []
1219  for eventType in value.m_eventTypes:
1220  result['eventTypes'].append(_convert_value(eventType))
1221 
1222  result['numberOfEvents'] = value.m_numberOfEvents
1223  result['runNumbers'] = list(value.m_runNumbers)
1224  result['lumiBlockNumbers'] = list(value.m_lumiBlockNumbers)
1225  result['processingTags'] = [str(v) for v in value.m_processingTags]
1226  result['itemList'] = []
1227 
1228  # Get the class name in the repository with CLID <clid>
1229  from CLIDComps.clidGenerator import clidGenerator
1230  cgen = clidGenerator("")
1231  for clid, sgkey in value.m_itemList:
1232  if isinstance(sgkey, bytes):
1233  sgkey = sgkey.decode()
1234  result['itemList'].append((cgen.getNameFromClid(clid), sgkey))
1235 
1236  return result
1237 
1238 
1240  result = {}
1241 
1242  for ef_element in value:
1243  result[ef_element.first] = ef_element.second.className()
1244 
1245  return result
1246 
1247 
1248 def _extract_fields_cbk(interface=None, aux=None):
1249  """Extract CutBookkeeper content into dictionary
1250 
1251  This function takes the CutBookkeeperContainer_v1 and CutBookkeeperAuxContainer_v1 objects.
1252  It makes sure the the interface object uses the auxiliary object as store.
1253  Args:
1254  interface (CutBookkeeperContainer_v1): the interface class
1255  aux (CutBookkeeperAuxContainer_v1): auxiliary container object
1256  Returns
1257  dict: with the cycle number and last stream
1258  """
1259  if not interface or not aux:
1260  return {}
1261  interface.setStore(aux)
1262 
1263  max_cycle = -1
1264  input_stream = ''
1265 
1266  for cbk in interface:
1267  current_cycle = int(cbk.cycle())
1268  if current_cycle > max_cycle:
1269  max_cycle = current_cycle
1270  input_stream = str(cbk.inputStream())
1271 
1272  result = {
1273  'currentCutCycle': max_cycle,
1274  'currentCutInputStream': input_stream,
1275  }
1276  return result
1277 
1278 
1279 def _extract_fields_fmd(interface=None, aux=None):
1280  """Turn static FileMetaData content into dictionary
1281 
1282  This function takes the FileMetaData_v1 and FileMetaDataAuxInfo_v1 objects.
1283  It makes sure the the interface object uses the auxiliary object as store.
1284  Next the two static variables of FileMetaDataAuxInfo_v1 are retrieved and
1285  added to the dictionary that is returned.
1286  Args:
1287  interface (FileMetaData_v1): the interface class
1288  aux (FileMetaDataAuxInfo_v1): auxiliary container object
1289  Returns
1290  dict: with the production release and dataType
1291  """
1292  import ROOT
1293  if not interface or not aux:
1294  return {}
1295  interface.setStore(aux)
1296  metaContent = {
1297  "productionRelease": ROOT.std.string(),
1298  "dataType": ROOT.std.string(),
1299  "runNumbers": ROOT.std.vector('unsigned int')(),
1300  "lumiBlocks": ROOT.std.vector('unsigned int')(),
1301  }
1302  # Note: using this for dynamic attributes returns empty content
1303  for k, v in metaContent.items():
1304  try:
1305  interface.value(getattr(interface, k), v)
1306  except AttributeError:
1307  interface.value(k, v)
1308  # Now return python objects
1309  result = {k: str(v) for k, v in metaContent.items() if type(v) is ROOT.std.string}
1310  result.update({k: list(v) for k, v in metaContent.items() if type(v) is ROOT.std.vector('unsigned int')})
1311  return result
1312 
1313 
1314 def _extract_fields_tmd(interface=None, aux=None):
1315  import ROOT
1316  BadAuxVarException = ROOT.SG.ExcBadAuxVar
1317  """Extract TruthMetaData content into dictionary
1318 
1319  This function takes the TruthMetaDataContainer_v1 and TruthMetaDataAuxContainer_v1 objects.
1320  It makes sure the the interface object uses the auxiliary object as store.
1321  Args:
1322  interface (TruthMetaDataContainer_v1): the interface class
1323  aux (TruthMetaDataAuxContainer_v1): auxiliary container object
1324  Returns
1325  dict
1326  """
1327  if not interface or not aux:
1328  return {}
1329  interface.setStore(aux)
1330 
1331  # return the first as we do not really expect more than one
1332  result = {}
1333  for tmd in interface:
1334  result['mcChannelNumber'] = tmd.mcChannelNumber()
1335 
1336  try:
1337  result['weightNames'] = list(tmd.weightNames())
1338  except BadAuxVarException:
1339  result['weightNames'] = []
1340 
1341  try:
1342  result['lhefGenerator'] = str(tmd.lhefGenerator())
1343  except BadAuxVarException:
1344  result['lhefGenerator'] = ''
1345 
1346  try:
1347  result['generators'] = str(tmd.generators())
1348  except BadAuxVarException:
1349  result['generators'] = ''
1350 
1351  try:
1352  result['evgenProcess'] = str(tmd.evgenProcess())
1353  except BadAuxVarException:
1354  result['evgenProcess'] = ''
1355 
1356  try:
1357  result['evgenTune'] = str(tmd.evgenTune())
1358  except BadAuxVarException:
1359  result['evgenTune'] = ''
1360 
1361  try:
1362  result['hardPDF'] = str(tmd.hardPDF())
1363  except BadAuxVarException:
1364  result['hardPDF'] = ''
1365 
1366  try:
1367  result['softPDF'] = str(tmd.softPDF())
1368  except BadAuxVarException:
1369  result['softPDF'] = ''
1370 
1371  return result
1372 
1373 
1374 """ Note: Deprecated. Legacy support for Run 2 AODs produced in release 21 or in release 22 prior to April 2021
1375 """
1376 def _extract_fields_triggermenu(interface, aux):
1377  if aux is None:
1378  return {}
1379 
1380  L1Items = []
1381  HLTChains = []
1382 
1383  try:
1384  interface.setStore( aux )
1385  if interface.size() > 0:
1386  # We make the assumption that the first stored SMK is
1387  # representative of all events in the input collection.
1388  firstMenu = interface.at(0)
1389  L1Items = [ _convert_value(item) for item in firstMenu.itemNames() ]
1390  HLTChains = [ _convert_value(chain) for chain in firstMenu.chainNames() ]
1391  except Exception as err: # noqa: F841
1392  msg.warn('Problem reading xAOD::TriggerMenu:')
1393 
1394  result = {}
1395  result['L1Items'] = L1Items
1396  result['HLTChains'] = HLTChains
1397 
1398  return result
1399 
1401  result = {}
1402 
1403  try:
1404  interface.setStore( aux )
1405  if interface.size() > 0:
1406  # We make the assumption that the first stored SMK is
1407  # representative of all events in the input collection.
1408  firstMenu = interface.at(0)
1409  import json
1410  decoded = json.loads(firstMenu.payload())
1411  result['RAWTriggerMenuJson'] = firstMenu.payload()
1412  result['name'] = firstMenu.name()
1413  result['dbkey'] = firstMenu.key()
1414  if decoded['filetype'] == 'hltmenu':
1415  result['HLTChains'] = [ _convert_value(chain) for chain in decoded['chains'] ]
1416  elif decoded['filetype'] == 'l1menu':
1417  result['L1Items'] = [ _convert_value(item) for item in decoded['items'] ]
1418  elif decoded['filetype'] in ['bunchgroupset', 'hltprescale', 'l1prescale', 'hltmonitoringsummary']:
1419  return result
1420 
1421  else:
1422  msg.warn('Got an xAOD::TriggerMenuJson called {0} but only expecting hltmenu or l1menu'.format(decoded['filetype']))
1423  return {}
1424 
1425  except Exception as err: # noqa: F841
1426  msg.warn('Problem reading xAOD::TriggerMenuJson')
1427 
1428  return result
1429 
1431  if 'user_type' in value:
1432  items = value['user_type'].split('#')[3:]
1433  for i in range(0, len(items), 2):
1434  value[items[i]] = _convert_value(items[i+1])
1435  return value
1436 
1438 
1439  types = None
1440  for key in value:
1441  if key == 'bit_mask':
1442  val = value[key]
1443 
1444  bitmask_length = len(val)
1445 
1446  is_simulation = False
1447  is_testbeam = False
1448  is_calibration = False
1449 
1450  if bitmask_length > 0: # ROOT.EventType.IS_SIMULATION
1451  is_simulation = val[0]
1452 
1453  if bitmask_length > 1: # ROOT.EventType.IS_TESTBEAM
1454  is_testbeam = val[1]
1455 
1456  if bitmask_length > 2: # ROOT.EventType.IS_CALIBRATION:
1457  is_calibration = val[2]
1458 
1459  types = [
1460  'IS_SIMULATION' if is_simulation else 'IS_DATA',
1461  'IS_TESTBEAM' if is_testbeam else 'IS_ATLAS',
1462  'IS_CALIBRATION' if is_calibration else 'IS_PHYSICS'
1463  ]
1464 
1465  value['type'] = types
1466  return value
1467 
1468 
1469 def make_lite(meta_dict):
1470  for filename, file_content in meta_dict.items():
1471  for key in file_content:
1472  if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
1473  for item in list(meta_dict[filename][key]):
1474  if item not in lite_primary_keys_to_keep:
1475  meta_dict[filename][key].pop(item)
1476 
1477  if '/TagInfo' in file_content:
1478 
1479 
1480  for item in list(meta_dict[filename]['/TagInfo']):
1481  if item not in lite_TagInfo_keys_to_keep:
1482  meta_dict[filename]['/TagInfo'].pop(item)
1483  return meta_dict
1484 
1485 
1486 def make_peeker(meta_dict):
1487  for filename, file_content in meta_dict.items():
1488  for key in file_content:
1489  if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
1490  keys_to_keep = [
1491  'lumiBlockNumbers',
1492  'runNumbers',
1493  'mc_event_number',
1494  'mc_channel_number',
1495  'eventTypes',
1496  'processingTags',
1497  'itemList'
1498  ]
1499  for item in list(meta_dict[filename][key]):
1500  if item not in keys_to_keep:
1501  meta_dict[filename][key].pop(item)
1502 
1503  if '/TagInfo' in file_content:
1504  keys_to_keep = [
1505  'beam_energy',
1506  'beam_type',
1507  'GeoAtlas',
1508  'IOVDbGlobalTag',
1509  'AODFixVersion',
1510  'AMITag',
1511  'project_name',
1512  'triggerStreamOfFile',
1513  'AtlasRelease',
1514  'specialConfiguration',
1515  'mc_campaign',
1516  'hepmc_version',
1517  'generators',
1518  'keywords',
1519  'data_year',
1520  ]
1521  for item in list(meta_dict[filename]['/TagInfo']):
1522  if item not in keys_to_keep:
1523  meta_dict[filename]['/TagInfo'].pop(item)
1524 
1525  if '/Simulation/Parameters' in file_content:
1526  keys_to_keep = [
1527  'G4Version',
1528  'TruthStrategy',
1529  'SimBarcodeOffset',
1530  'RegenerationIncrement',
1531  'TRTRangeCut',
1532  'SimulationFlavour',
1533  'Simulator',
1534  'PhysicsList',
1535  'SimulatedDetectors',
1536  'IsDataOverlay',
1537  ]
1538  for item in list(meta_dict[filename]['/Simulation/Parameters']):
1539  if item not in keys_to_keep:
1540  meta_dict[filename]['/Simulation/Parameters'].pop(item)
1541 
1542  if '/Digitization/Parameters' in file_content:
1543  keys_to_keep = [
1544  'numberOfCollisions',
1545  'intraTrainBunchSpacing',
1546  'BeamIntensityPattern'
1547  'physicsList',
1548  'digiSteeringConf',
1549  'pileUp',
1550  'DigitizedDetectors',
1551  ]
1552  for item in list(meta_dict[filename]['/Digitization/Parameters']):
1553  if item not in keys_to_keep:
1554  meta_dict[filename]['/Digitization/Parameters'].pop(item)
1555 
1556  if 'CutBookkeepers' in file_content:
1557  keys_to_keep = [
1558  'currentCutCycle',
1559  'currentCutInputStream',
1560  ]
1561  for item in list(meta_dict[filename]['CutBookkeepers']):
1562  if item not in keys_to_keep:
1563  meta_dict[filename]['CutBookkeepers'].pop(item)
1564 
1565  if 'TruthMetaData' in file_content:
1566  keys_to_keep = [
1567  'mcChannelNumber',
1568  'weightNames',
1569  ]
1570  for item in list(meta_dict[filename]['TruthMetaData']):
1571  if item not in keys_to_keep:
1572  meta_dict[filename]['TruthMetaData'].pop(item)
1573 
1574  return meta_dict
1575 
1576 
1577 def promote_keys(meta_dict, mode):
1578  for filename, file_content in meta_dict.items():
1579  md = meta_dict[filename]
1580  for key in file_content:
1581  if key in md['metadata_items'] and regexEventStreamInfo.match(md['metadata_items'][key]):
1582  md.update(md[key])
1583 
1584  if 'eventTypes' in md and len(md['eventTypes']):
1585  et = md['eventTypes'][0]
1586  md['mc_event_number'] = et.get('mc_event_number', md['runNumbers'][0])
1587  if 'mc_channel_number' in et:
1588  md['mc_channel_number'] = et.get('mc_channel_number', None)
1589  md['eventTypes'] = et['type']
1590 
1591  # For very old files
1592  if 'GeoAtlas' in et:
1593  md['GeoAtlas'] = et.get('GeoAtlas', None)
1594  if 'IOVDbGlobalTag' in et:
1595  md['IOVDbGlobalTag'] = et.get('IOVDbGlobalTag', None)
1596 
1597  if 'lumiBlockNumbers' in md[key]:
1598  md['lumiBlockNumbers'] = md[key]['lumiBlockNumbers']
1599 
1600  if 'processingTags' in md[key]:
1601  md['processingTags'] = md[key]['processingTags']
1602 
1603  meta_dict[filename].pop(key)
1604  break
1605 
1606  if not isGaudiEnv() and key in md['metadata_items'] and 'FileMetaData' in key:
1607  if 'beamType' in md[key]:
1608  md['beam_type'] = md[key]['beamType']
1609 
1610  if 'runNumbers' in md[key]:
1611  md['runNumbers'] = md[key]['runNumbers']
1612 
1613  if 'mcProcID' in md[key]:
1614  md['mc_channel_number'] = int(md[key]['mcProcID'])
1615 
1616  if 'mcCampaign' in md[key]:
1617  md['mc_campaign'] = md[key]['mcCampaign']
1618 
1619  if 'dataYear' in md[key]:
1620  md['data_year'] = int(md[key]['dataYear'])
1621 
1622  if 'lumiBlocks' in md[key]:
1623  md['lumiBlockNumbers'] = md[key]['lumiBlocks']
1624 
1625  if mode == 'peeker' and 'amiTag' in md[key]:
1626  md['AMITag'] = md[key]['amiTag']
1627 
1628  if 'beamEnergy' in md[key]:
1629  md['beam_energy'] = int(md[key]['beamEnergy'])
1630 
1631  if 'geometryVersion' in md[key]:
1632  md['GeoAtlas'] = md[key]['geometryVersion']
1633 
1634  # EventType checks
1635  md['eventTypes'] = []
1636  if mode == 'peeker' and 'simFlavour' in md[key]:
1637  md['SimulationFlavour'] = md[key]['simFlavour']
1638 
1639  if mode == 'peeker' and 'isDataOverlay' in md[key]:
1640  md['IsDataOverlay'] = md[key]['isDataOverlay']
1641 
1642  if 'dataType' in md[key]:
1643  md['processingTags'] = [md[key]['dataType']]
1644 
1645  if (
1646  ('simFlavour' in md[key] and ('FullG4' in md[key]['simFlavour'] or 'ATLFAST' in md[key]['simFlavour']))
1647  or 'DAOD_TRUTH' in md[key]['dataType']
1648  ):
1649  md['eventTypes'].append('IS_SIMULATION')
1650  else:
1651  md['eventTypes'].append('IS_DATA')
1652 
1653  if (
1654  'GeoAtlas' in md and 'ATLAS' in md['GeoAtlas']
1655  or 'DAOD_TRUTH' in md[key]['dataType']
1656  ):
1657  md['eventTypes'].append('IS_ATLAS')
1658  # this is probably safe to assume for all files used in AnalysisBase
1659  md['eventTypes'].append('IS_PHYSICS')
1660  else:
1661  md['eventTypes'].append('IS_TESTBEAM')
1662 
1663  if mode == 'peeker':
1664  if 'productionRelease' in md[key]:
1665  md['AtlasRelease'] = md[key]['productionRelease']
1666 
1667  if 'generatorsInfo' in md[key]:
1668  md['generators'] = md[key]['generatorsInfo']
1669 
1670  if mode == 'lite':
1671  meta_dict[filename].pop(key)
1672  break
1673 
1674  if '/TagInfo' in file_content:
1675  md.update(md['/TagInfo'])
1676  md.pop('/TagInfo')
1677 
1678  if '/Generation/Parameters' in file_content:
1679  md.update(md['/Generation/Parameters'])
1680  md.pop('/Generation/Parameters')
1681 
1682  if '/Simulation/Parameters' in file_content:
1683  md.update(md['/Simulation/Parameters'])
1684  md.pop('/Simulation/Parameters')
1685 
1686  if '/Digitization/Parameters' in file_content:
1687  md.update(md['/Digitization/Parameters'])
1688  md.pop('/Digitization/Parameters')
1689 
1690  if 'CutBookkeepers' in file_content:
1691  md.update(md['CutBookkeepers'])
1692  md.pop('CutBookkeepers')
1693 
1694  return meta_dict
1695 
1696 
1697 def convert_itemList(metadata, layout):
1698  """
1699  This function will rearrange the itemList values to match the format of 'eventdata_items', 'eventdata_itemsList'
1700  or 'eventdata_itemsDic' generated with the legacy file peeker tool
1701  :param metadata: a dictionary obtained using read_metadata method.
1702  The mode for read_metadata must be 'peeker of 'full'
1703  :param layout: the mode in which the data will be converted:
1704  * for 'eventdata_items' use: layout= None
1705  * for 'eventdata_itemsList' use: layout= '#join'
1706  * for 'eventdata_itemsDic' use: layout= 'dict'
1707  """
1708 
1709  # Find the itemsList:
1710  item_list = None
1711 
1712  if 'itemList' in metadata:
1713  item_list = metadata['itemList']
1714  else:
1715 
1716  current_key = None
1717 
1718  for key in metadata:
1719  if 'metadata_items' in metadata and key in metadata['metadata_items'] and metadata['metadata_items'][key] == 'EventStreamInfo_p3':
1720  current_key = key
1721  break
1722  if current_key is not None:
1723  item_list = metadata[current_key]['itemList']
1724 
1725  if item_list is not None:
1726 
1727  if layout is None:
1728  return item_list
1729 
1730  elif layout == '#join':
1731  return [k + '#' + v for k, v in item_list if k]
1732 
1733 
1734  elif layout == 'dict':
1735  from collections import defaultdict
1736  dic = defaultdict(list)
1737 
1738  for k, v in item_list:
1739  dic[k].append(v)
1740 
1741  return dict(dic)
1742 
1743 
1745  """Extract number of entries from DataHeader.
1746 
1747  infile ROOT TFile object or filename string
1748  return Number of entries as returned by DataHeader object in infile,
1749  None in absence of DataHeader object
1750  """
1751  import ROOT
1752  from PyUtils.PoolFile import PoolOpts
1753  if not isinstance(infile, ROOT.TFile):
1754  infile = ROOT.TFile.Open(infile)
1755 
1756  for name in {PoolOpts.TTreeNames.DataHeader, PoolOpts.RNTupleNames.DataHeader}:
1757  obj = infile.Get(name)
1758  msg.debug(f"dataheader_nentries: {name=}, {obj=}, {type(obj)=}")
1759  if not obj:
1760  continue
1761  if isinstance(obj, ROOT.TTree):
1762  return obj.GetEntriesFast()
1763  else:
1764  # check early to avoid scary ROOT read errors
1765  if ROOT.gROOT.GetVersionInt() < 63100:
1766  raise RuntimeError("ROOT ver. 6.31/01 or greater needed to read RNTuple files")
1767  if isRNTuple(obj):
1768  try:
1769  return ROOT.Experimental.RNTupleReader.Open(obj).GetNEntries()
1770  except AttributeError:
1771  return ROOT.RNTupleReader.Open(obj).GetNEntries()
1772  else:
1773  raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
1774 
1775 def get_meta_filter(mode="lite", meta_key_filter=None) -> dict:
1776  """Return a dictionary of metadata filters based on the mode and
1777  optional meta_key_filter.
1778  """
1779 
1780  if meta_key_filter is None:
1781  meta_key_filter = []
1782 
1783  # create a container for the list of filters used for the lite version
1784  meta_filter = {}
1785 
1786  # set the filters for name
1787  if mode == 'lite':
1788  if isGaudiEnv():
1789  meta_filter = {
1790  '/TagInfo': 'IOVMetaDataContainer_p1',
1791  'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
1792  '*': 'EventStreamInfo_p*'
1793  }
1794  else:
1795  meta_filter = {
1796  'FileMetaData': '*',
1797  'FileMetaDataAux.': 'xAOD::FileMetaDataAuxInfo_v1',
1798  }
1799 
1800  # set the filters for name
1801  if mode == 'peeker':
1802  meta_filter.update({
1803  'TriggerMenu': 'DataVector<xAOD::TriggerMenu_v1>', # R2 trigger metadata format AOD (deprecated)
1804  'TriggerMenuAux.': 'xAOD::TriggerMenuAuxContainer_v1',
1805  'DataVector<xAOD::TriggerMenu_v1>_TriggerMenu': 'DataVector<xAOD::TriggerMenu_v1>', # R2 trigger metadata format ESD (deprecated)
1806  'xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.': 'xAOD::TriggerMenuAuxContainer_v1',
1807  'TriggerMenuJson_HLT': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1808  'TriggerMenuJson_HLTAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1809  'TriggerMenuJson_HLTMonitoring': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1810  'TriggerMenuJson_HLTMonitoringAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1811  'TriggerMenuJson_HLTPS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1812  'TriggerMenuJson_HLTPSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1813  'TriggerMenuJson_L1': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1814  'TriggerMenuJson_L1Aux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1815  'TriggerMenuJson_L1PS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1816  'TriggerMenuJson_L1PSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1817  'CutBookkeepers': 'xAOD::CutBookkeeperContainer_v1',
1818  'CutBookkeepersAux.': 'xAOD::CutBookkeeperAuxContainer_v1',
1819  'FileMetaData': '*',
1820  'FileMetaDataAux.': 'xAOD::FileMetaDataAuxInfo_v1',
1821  'TruthMetaData': '*',
1822  'TruthMetaDataAux.': 'xAOD::TruthMetaDataAuxContainer_v1',
1823  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1824  'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1825  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1826  'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTMonitoringAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1827  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1828  'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTPSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1829  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1830  'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1Aux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1831  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1832  'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1PSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1'
1833  })
1834 
1835  if isGaudiEnv():
1836  meta_filter.update({
1837  '/TagInfo': 'IOVMetaDataContainer_p1',
1838  'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
1839  '/Simulation/Parameters': 'IOVMetaDataContainer_p1',
1840  '/Digitization/Parameters': 'IOVMetaDataContainer_p1',
1841  '/EXT/DCS/MAGNETS/SENSORDATA': 'IOVMetaDataContainer_p1',
1842  '*': 'EventStreamInfo_p*'
1843  })
1844 
1845  if (mode == 'full' or mode == 'iov') and meta_key_filter:
1846  meta_filter = {f: '*' for f in meta_key_filter}
1847 
1848  return meta_filter
1849 
1850 def denormalize_metadata_types(metadata_dict):
1851  """
1852  Convert canonical/C++ STL types in the metadata_items dictionary back to their
1853  ROOT equivalents for backward compatibility.
1854  - 'float' => 'Float_t'
1855  - 'char' => 'Char_t'
1856  - 'std::string' => 'string'
1857  - 'xAOD::FileMetaData_v1' => 'FileMetaData'
1858  - 'xAOD::FileMetaDataAuxInfo_v1' => 'FileMetaDataAux'
1859  (add more as needed)
1860  """
1861  type_map = {
1862  "float": "Float_t",
1863  "char": "Char_t",
1864  "std::string": "string",
1865  "std::uint32_t": "UInt_t",
1866  "xAOD::FileMetaData_v1": "FileMetaData",
1867  }
1868  denormalized = {}
1869  for k, v in metadata_dict.items():
1870  new_v = v
1871  for old, new in type_map.items():
1872  if new_v == old:
1873  new_v = new
1874  elif new_v.endswith("." + old):
1875  new_v = new_v.rsplit(".", 1)[0] + "." + new
1876  denormalized[k] = new_v
1877  return denormalized
1878 
1879 
1880 def should_keep_meta(normalizedName, typeName, meta_filter):
1881  """
1882  Helper function to determine if metadata should be kept based on meta_filter.
1883  """
1884  if len(meta_filter) == 0:
1885  return True
1886 
1887  for filter_key, filter_class in meta_filter.items():
1888  if (
1889  filter_key.replace("/", "_") in normalizedName.replace("/", "_")
1890  or filter_key == "*"
1891  ) and fnmatchcase(typeName, filter_class):
1892  if "CutBookkeepers" in filter_key:
1893  keep = filter_key == normalizedName
1894  if keep:
1895  return True
1896  else:
1897  return True
1898  return False
replace
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition: hcg.cxx:307
python.MetaReader._extract_iov_detailed
def _extract_iov_detailed(iov_container)
Definition: MetaReader.py:1184
vtune_athena.format
format
Definition: vtune_athena.py:14
python.MetaReader.read_metadata
def read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter=None, unique_tag_info_values=True, ignoreNonExistingLocalFiles=False)
Definition: MetaReader.py:70
python.MetaReader._extract_fields_tmd
def _extract_fields_tmd(interface=None, aux=None)
Definition: MetaReader.py:1314
python.MetaReader._extract_fields_esi
def _extract_fields_esi(value)
Definition: MetaReader.py:1215
python.MetaReader.should_keep_meta
def should_keep_meta(normalizedName, typeName, meta_filter)
Definition: MetaReader.py:1880
python.MetaReader._extract_fields_iovmdc
def _extract_fields_iovmdc(value)
Definition: MetaReader.py:1211
python.MetaReader.make_peeker
def make_peeker(meta_dict)
Definition: MetaReader.py:1486
dumpHVPathFromNtuple.append
bool append
Definition: dumpHVPathFromNtuple.py:91
python.MetaReader._extract_fields
def _extract_fields(obj)
Definition: MetaReader.py:1021
python.CaloAddPedShiftConfig.type
type
Definition: CaloAddPedShiftConfig.py:42
python.MetaReader._check_project
def _check_project()
Definition: MetaReader.py:933
python.MetaReader._get_pfn
def _get_pfn(filename)
Definition: MetaReader.py:942
python.MetaReader._extract_fields_iov
def _extract_fields_iov(iov_container, idx_range)
Definition: MetaReader.py:1155
fillPileUpNoiseLumi.next
next
Definition: fillPileUpNoiseLumi.py:52
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:194
python.MetaReader._convert_event_type_bitmask
def _convert_event_type_bitmask(value)
Definition: MetaReader.py:1437
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
python.MetaReader.make_lite
def make_lite(meta_dict)
Definition: MetaReader.py:1469
python.AthConfigFlags.isGaudiEnv
def isGaudiEnv()
Definition: AthConfigFlags.py:16
python.MetaReader.get_meta_filter
dict get_meta_filter(mode="lite", meta_key_filter=None)
Definition: MetaReader.py:1775
python.MetaReader.denormalize_metadata_types
def denormalize_metadata_types(metadata_dict)
Definition: MetaReader.py:1850
python.MetaReader._convert_event_type_user_type
def _convert_event_type_user_type(value)
Definition: MetaReader.py:1430
python.MetaReader._extract_fields_triggermenu
def _extract_fields_triggermenu(interface, aux)
Definition: MetaReader.py:1376
beamspotman.dir
string dir
Definition: beamspotman.py:619
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
python.MetaReader._convert_value
def _convert_value(value, aux=None)
Definition: MetaReader.py:1036
python.MetaReader._extract_fields_cbk
def _extract_fields_cbk(interface=None, aux=None)
Definition: MetaReader.py:1248
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.MetaReader._read_guid
def _read_guid(filename)
Definition: MetaReader.py:960
TrigJetMonitorAlgorithm.items
items
Definition: TrigJetMonitorAlgorithm.py:71
python.MetaReader._extract_fields_triggermenujson
def _extract_fields_triggermenujson(interface, aux)
Definition: MetaReader.py:1400
python.MetaReader.promote_keys
def promote_keys(meta_dict, mode)
Definition: MetaReader.py:1577
Trk::open
@ open
Definition: BinningType.h:40
python.MetaReader._extract_fields_fmd
def _extract_fields_fmd(interface=None, aux=None)
Definition: MetaReader.py:1279
python.MetaReader._extract_fields_ef
def _extract_fields_ef(value)
Definition: MetaReader.py:1239
python.CaloAddPedShiftConfig.int
int
Definition: CaloAddPedShiftConfig.py:45
python.MetaReader.dataheader_nentries
def dataheader_nentries(infile)
Definition: MetaReader.py:1744
python.MetaReader.convert_itemList
def convert_itemList(metadata, layout)
Definition: MetaReader.py:1697
python.PoolFile.isRNTuple
def isRNTuple(obj)
Definition: PoolFile.py:35
str
Definition: BTagTrackIpAccessor.cxx:11
python.MetaReader._get_attribute_val
def _get_attribute_val(iov_container, attr_name, attr_idx)
Definition: MetaReader.py:1102
xAOD::bool
setBGCode setTAP setLVL2ErrorBits bool
Definition: TrigDecision_v1.cxx:60
Trk::split
@ split
Definition: LayerMaterialProperties.h:38
python.LArMinBiasAlgConfig.float
float
Definition: LArMinBiasAlgConfig.py:65