ATLAS Offline Software
MetaReader.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2 
3 import os
4 import re
5 from fnmatch import fnmatchcase
6 from AthenaCommon.Logging import logging
7 from AthenaConfiguration.AthConfigFlags import isGaudiEnv
8 from ROOT import gSystem
9 
10 msg = logging.getLogger('MetaReader')
11 
12 # compile the regex needed in _convert_value() outside it to optimize the code.
13 regexEventStreamInfo = re.compile(r'^EventStreamInfo(_p\d+)?$')
14 regexIOVMetaDataContainer = re.compile(r'^IOVMetaDataContainer(_p\d+)?$')
15 regexByteStreamMetadataContainer = re.compile(r'^ByteStreamMetadataContainer(_p\d+)?$')
16 regexXAODCutBookkeeperContainer = re.compile(r'^xAOD::CutBookkeeperContainer(_v\d+)?$')
17 regexXAODCutBookkeeperContainerAux = re.compile(r'^xAOD::CutBookkeeperAuxContainer(_v\d+)?$')
18 regexXAODEventFormat = re.compile(r'^xAOD::EventFormat(_v\d+)?$')
19 regexXAODFileMetaData = re.compile(r'^xAOD::FileMetaData(_v\d+)?$')
20 regexXAODFileMetaDataAux = re.compile(r'^xAOD::FileMetaDataAuxInfo(_v\d+)?$')
21 regexXAODFileMetaDataAuxDyn = re.compile(r'^(xAOD::)?FileMetaData.*AuxDyn(\.[a-zA-Z0-9]+)?$')
22 regexXAODTriggerMenu = re.compile(r'^DataVector<xAOD::TriggerMenu(_v\d+)?>$') # Run 2
23 regexXAODTriggerMenuAux = re.compile(r'^xAOD::TriggerMenuAuxContainer(_v\d+)?$') # Run 2
24 regexXAODTriggerMenuJson = re.compile(r'^DataVector<xAOD::TriggerMenuJson(_v\d+)?>$') # Run 3
25 regexXAODTriggerMenuJsonAux = re.compile(r'^xAOD::TriggerMenuJsonAuxContainer(_v\d+)?$') # Run 3
26 regexXAODTruthMetaData = re.compile(r'^DataVector<xAOD::TruthMetaData(_v\d+)?>$')
27 regexXAODTruthMetaDataAux = re.compile(r'^xAOD::TruthMetaDataAuxContainer(_v\d+)?$')
28 regex_cppname = re.compile(r'^([\w:]+)(<.*>)?$')
29 # regex_persistent_class = re.compile(r'^([a-zA-Z]+_p\d+::)*[a-zA-Z]+_p\d+$')
30 regex_persistent_class = re.compile(r'^([a-zA-Z]+(_[pv]\d+)?::)*[a-zA-Z]+_[pv]\d+$')
31 regex_BS_files = re.compile(r'^(\w+):.*((\.D?RAW\..*)|(\.data$))')
32 regex_URI_scheme = re.compile(r'^([A-Za-z0-9\+\.\-]+)\:')
33 
34 lite_primary_keys_to_keep = [
35  'lumiBlockNumbers', 'runNumbers', 'mc_event_number', 'mc_channel_number',
36  'eventTypes', 'processingTags', 'itemList']
37 lite_TagInfo_keys_to_keep = [
38  'beam_energy', 'beam_type', 'GeoAtlas', 'IOVDbGlobalTag',
39  'AODFixVersion', 'project_name', 'mc_campaign']
40 
41 trigger_keys = [
42  'TriggerConfigInfo',
43  'TriggerMenu', 'TriggerMenuJson_BG', 'TriggerMenuJson_HLT', 'TriggerMenuJson_HLTMonitoring', 'TriggerMenuJson_HLTPS', 'TriggerMenuJson_L1', 'TriggerMenuJson_L1PS',
44  '/TRIGGER/HLT/Groups', '/TRIGGER/HLT/HltConfigKeys', '/TRIGGER/HLT/Menu', '/TRIGGER/HLT/PrescaleKey', '/TRIGGER/HLT/Prescales',
45  '/TRIGGER/LVL1/ItemDef', '/TRIGGER/LVL1/Lvl1ConfigKey', '/TRIGGER/LVL1/Menu', '/TRIGGER/LVL1/Prescales', '/TRIGGER/LVL1/Thresholds',
46  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenu', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_BG', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT',
47  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS',
48  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS',
49 ]
50 
51 
52 def read_metadata(filenames, file_type = None, mode = 'lite', promote = None, meta_key_filter = None,
53  unique_tag_info_values = True, ignoreNonExistingLocalFiles=False):
54  """
55  This tool is independent of Athena framework and returns the metadata from a given file.
56  :param filenames: the input file from which metadata needs to be extracted.
57  :param file_type: the type of file. POOL or BS (bytestream: RAW, DRAW) files.
58  :param mode: if true, will return all metadata associated with the filename. By default, is false and this will
59  return a "tiny" version which have only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'.
60  :return: a dictionary of metadata for the given input file.
61  """
62 
63  # make the mode available in the _convert methods
64  global _gbl_mode
65  _gbl_mode = mode
66 
67  from RootUtils import PyROOTFixes # noqa F401
68 
69  # Check if the input is a file or a list of files.
70  if isinstance(filenames, str):
71  filenames = [filenames]
72 
73  # Check if file_type is an allowed value
74  if file_type is not None:
75  if file_type not in ('POOL', 'BS'):
76  raise NameError('Allowed values for \'file_type\' parameter are: "POOL" or "BS": you provided "' + file_type + '"')
77  else:
78  msg.info('Forced file_type: {0}'.format(file_type))
79 
80  # Check the value of mode parameter
81  if mode not in ('tiny', 'lite', 'full', 'peeker', 'iov'):
82  raise NameError('Allowed values for "mode" parameter are: "tiny", "lite", "peeker", "iov" or "full"')
83 
84  if meta_key_filter is None:
85  meta_key_filter = []
86 
87  # Disable 'full' and 'iov' in non-Gaudi environments
88  if not isGaudiEnv():
89  if mode in ('full', 'iov'):
90  raise NameError('The following modes are not available in AnalysisBase: "iov" and "full"')
91 
92  msg.info('Current mode used: {0}'.format(mode))
93  msg.info('Current filenames: {0}'.format(filenames))
94 
95  if mode != 'full' and mode !='iov' and len(meta_key_filter) > 0:
96  raise NameError('It is possible to use the meta_key_filter option only for full mode')
97  if meta_key_filter:
98  msg.info('Filter used: {0}'.format(meta_key_filter))
99 
100  # create the storage object for metadata.
101  meta_dict = {}
102 
103  # ----- retrieve metadata from all filename or filenames --------------------------------------------------------#
104  for filename in filenames:
105  meta_dict[filename] = {}
106  current_file_type = None
107  # Determine the file_type of the input and store this information into meta_dict
108  if not file_type:
109  if os.path.isfile(filename):
110 
111  if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and gSystem.AccessPathName(filename): # Attention, bizarre convention of return value!!
112  msg.warn('Ignoring not accessible file: {}'.format(filename))
113  continue
114 
115  with open(filename, 'rb') as binary_file:
116  magic_file = binary_file.read(4)
117 
118  if magic_file == 'root' or magic_file == b'root':
119  current_file_type = 'POOL'
120  meta_dict[filename]['file_type'] = 'POOL'
121 
122  else:
123  current_file_type = 'BS'
124  meta_dict[filename]['file_type'] = 'BS'
125 
126  # add information about the file_size of the input filename
127  meta_dict[filename]['file_size'] = os.path.getsize(filename)
128 
129  # determine the file type for the remote input files
130  else:
131  if regex_BS_files.match(filename):
132  current_file_type = 'BS'
133  meta_dict[filename]['file_type'] = 'BS'
134  else:
135  current_file_type = 'POOL'
136  meta_dict[filename]['file_type'] = 'POOL'
137 
138  # add information about the file_size of the input filename
139  meta_dict[filename]['file_size'] = None # None -> we can't read the file size for a remote file
140 
141  else:
142  current_file_type = file_type
143 
144  # ----- retrieves metadata from POOL files ------------------------------------------------------------------#
145  if current_file_type == 'POOL':
146 
147  if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and gSystem.AccessPathName(filename): # Attention, bizarre convention of return value!!
148  msg.warn('Ignoring not accessible file: {}'.format(filename))
149  continue
150 
151  import ROOT
152  # open the file using ROOT.TFile
153  current_file = ROOT.TFile.Open( _get_pfn(filename) )
154 
155  # get auto flush setting from the main EventData TTree
156  from PyUtils.PoolFile import PoolOpts
157  collectionTree = current_file.Get(PoolOpts.TTreeNames.EventData)
158  if isinstance(collectionTree, ROOT.TTree):
159  meta_dict[filename]['auto_flush'] = collectionTree.GetAutoFlush()
160 
161  # read and add the 'GUID' value
162  meta_dict[filename]['file_guid'] = _read_guid(filename)
163 
164  # read and add compression level and algorithm
165  meta_dict[filename]['file_comp_alg'] = current_file.GetCompressionAlgorithm()
166  meta_dict[filename]['file_comp_level'] = current_file.GetCompressionLevel()
167 
168  # ----- read extra metadata required for 'lite' and 'full' modes ----------------------------------------#
169  if mode != 'tiny':
170  # selecting from all tree the only one which contains metadata, respectively "MetaData"
171  metadata_tree = current_file.Get('MetaData')
172  # read all list of branches stored in "MetaData" tree
173  metadata_branches = metadata_tree.GetListOfBranches()
174  nr_of_branches = metadata_branches.GetEntriesFast()
175 
176  # object to store the names of metadata containers and their corresponding class name.
177  meta_dict[filename]['metadata_items'] = {}
178 
179  # create a container for the list of filters used for the lite version
180  meta_filter = {}
181 
182  # set the filters for name
183  if mode == 'lite':
184  if isGaudiEnv():
185  meta_filter = {
186  '/TagInfo': 'IOVMetaDataContainer_p1',
187  'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
188  '*': 'EventStreamInfo_p*'
189  }
190  else:
191  meta_filter = {
192  'FileMetaData': '*',
193  'FileMetaDataAux.': 'xAOD::FileMetaDataAuxInfo_v1',
194  }
195 
196  # set the filters for name
197  if mode == 'peeker':
198  meta_filter.update({
199  'TriggerMenu': 'DataVector<xAOD::TriggerMenu_v1>', # R2 trigger metadata format AOD (deprecated)
200  'TriggerMenuAux.': 'xAOD::TriggerMenuAuxContainer_v1',
201  'DataVector<xAOD::TriggerMenu_v1>_TriggerMenu': 'DataVector<xAOD::TriggerMenu_v1>', # R2 trigger metadata format ESD (deprecated)
202  'xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.': 'xAOD::TriggerMenuAuxContainer_v1',
203  'TriggerMenuJson_HLT': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
204  'TriggerMenuJson_HLTAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
205  'TriggerMenuJson_HLTMonitoring': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
206  'TriggerMenuJson_HLTMonitoringAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
207  'TriggerMenuJson_HLTPS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
208  'TriggerMenuJson_HLTPSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
209  'TriggerMenuJson_L1': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
210  'TriggerMenuJson_L1Aux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
211  'TriggerMenuJson_L1PS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
212  'TriggerMenuJson_L1PSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
213  'CutBookkeepers': 'xAOD::CutBookkeeperContainer_v1',
214  'CutBookkeepersAux.': 'xAOD::CutBookkeeperAuxContainer_v1',
215  'FileMetaData': '*',
216  'FileMetaDataAux.': 'xAOD::FileMetaDataAuxInfo_v1',
217  'TruthMetaData': '*',
218  'TruthMetaDataAux.': 'xAOD::TruthMetaDataAuxContainer_v1',
219  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
220  'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
221  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
222  'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTMonitoringAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
223  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
224  'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTPSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
225  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
226  'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1Aux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
227  'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
228  'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1PSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1'
229  })
230 
231  if isGaudiEnv():
232  meta_filter.update({
233  '/TagInfo': 'IOVMetaDataContainer_p1',
234  'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
235  '/Simulation/Parameters': 'IOVMetaDataContainer_p1',
236  '/Digitization/Parameters': 'IOVMetaDataContainer_p1',
237  '/EXT/DCS/MAGNETS/SENSORDATA': 'IOVMetaDataContainer_p1',
238  '*': 'EventStreamInfo_p*'
239  })
240 
241  if (mode == 'full' or mode == 'iov') and meta_key_filter:
242  meta_filter = {f: '*' for f in meta_key_filter}
243  # store all persistent classes for metadata container existing in a POOL/ROOT file.
244  persistent_instances = {}
245  dynamic_fmd_items = {}
246 
247  # Protect non-Gaudi environments from meta-data classes it doesn't know about
248  if not isGaudiEnv():
249  metadata_tree.SetBranchStatus("*", False)
250 
251  for i in range(0, nr_of_branches):
252  branch = metadata_branches.At(i)
253  name = branch.GetName()
254  if name == 'index_ref':
255  # skip the index branch
256  continue
257 
258  class_name = branch.GetClassName()
259 
260  if regexIOVMetaDataContainer.match(class_name):
261  name = name.replace('IOVMetaDataContainer_p1_', '').replace('_', '/')
262 
263  if regexIOVMetaDataContainer.match(class_name):
264  meta_dict[filename]['metadata_items'][name] = 'IOVMetaDataContainer'
265  elif regexByteStreamMetadataContainer.match(class_name):
266  meta_dict[filename]['metadata_items'][name] = 'ByteStreamMetadataContainer'
267  elif regexEventStreamInfo.match(class_name):
268  meta_dict[filename]['metadata_items'][name] = 'EventStreamInfo'
269  elif regexXAODFileMetaData.match(class_name):
270  meta_dict[filename]['metadata_items'][name] = 'FileMetaData'
271  elif regexXAODTruthMetaData.match(class_name):
272  meta_dict[filename]['metadata_items'][name] = 'TruthMetaData'
273  else:
274  type_name = class_name
275  if not type_name:
276  try:
277  type_name = branch.GetListOfLeaves()[0].GetTypeName()
278  except IndexError:
279  pass
280  meta_dict[filename]['metadata_items'][name] = type_name
281 
282  if len(meta_filter) > 0:
283  keep = False
284  for filter_key, filter_class in meta_filter.items():
285  if (filter_key.replace('/', '_') in name.replace('/', '_') or filter_key == '*') and fnmatchcase(class_name, filter_class):
286  if 'CutBookkeepers' in filter_key:
287  keep = filter_key == name
288  if keep:
289  break
290  else:
291  keep = True
292  break
293 
294  if not keep:
295  continue
296  else:
297  # CutBookkeepers should always be filtered:
298  if 'CutBookkeepers' in name and name not in ['CutBookkeepers', 'CutBookkeepersAux.']:
299  continue
300 
301  if not isGaudiEnv():
302  metadata_tree.SetBranchStatus(f"{name}*", True)
303 
304  # assign the corresponding persistent class based of the name of the metadata container
305  if regexEventStreamInfo.match(class_name):
306  if class_name.endswith('_p1'):
307  persistent_instances[name] = ROOT.EventStreamInfo_p1()
308  elif class_name.endswith('_p2'):
309  persistent_instances[name] = ROOT.EventStreamInfo_p2()
310  else:
311  persistent_instances[name] = ROOT.EventStreamInfo_p3()
312  elif regexIOVMetaDataContainer.match(class_name):
313  persistent_instances[name] = ROOT.IOVMetaDataContainer_p1()
314  elif regexXAODEventFormat.match(class_name):
315  persistent_instances[name] = ROOT.xAOD.EventFormat_v1()
316  elif regexXAODTriggerMenu.match(class_name) and _check_project() not in ['AthGeneration']:
317  persistent_instances[name] = ROOT.xAOD.TriggerMenuContainer_v1()
318  elif regexXAODTriggerMenuAux.match(class_name) and _check_project() not in ['AthGeneration']:
319  persistent_instances[name] = ROOT.xAOD.TriggerMenuAuxContainer_v1()
320  elif regexXAODTriggerMenuJson.match(class_name) and _check_project() not in ['AthGeneration']:
321  persistent_instances[name] = ROOT.xAOD.TriggerMenuJsonContainer_v1()
322  elif regexXAODTriggerMenuJsonAux.match(class_name) and _check_project() not in ['AthGeneration']:
323  persistent_instances[name] = ROOT.xAOD.TriggerMenuJsonAuxContainer_v1()
324  elif regexXAODCutBookkeeperContainer.match(class_name):
325  persistent_instances[name] = ROOT.xAOD.CutBookkeeperContainer_v1()
326  elif regexXAODCutBookkeeperContainerAux.match(class_name):
327  persistent_instances[name] = ROOT.xAOD.CutBookkeeperAuxContainer_v1()
328  elif regexXAODFileMetaData.match(class_name):
329  persistent_instances[name] = ROOT.xAOD.FileMetaData_v1()
330  elif regexXAODFileMetaDataAux.match(class_name):
331  persistent_instances[name] = ROOT.xAOD.FileMetaDataAuxInfo_v1()
332  elif regexXAODTruthMetaData.match(class_name):
333  persistent_instances[name] = ROOT.xAOD.TruthMetaDataContainer_v1()
334  elif regexXAODTruthMetaDataAux.match(class_name):
335  persistent_instances[name] = ROOT.xAOD.TruthMetaDataAuxContainer_v1()
336 
337  if name in persistent_instances:
338  branch.SetAddress(ROOT.AddressOf(persistent_instances[name]))
339 
340  # This creates a dict to store the dynamic attributes of the xAOD::FileMetaData
341  dynamicFMD = regexXAODFileMetaDataAuxDyn.match(name)
342  if dynamicFMD:
343  dynamicName = dynamicFMD.group().split('.')[-1]
344  dynamicType = regex_cppname.match(class_name)
345  if dynamicType:
346  # this should be a string
347  dynamic_fmd_items[dynamicName] = ROOT.std.string()
348  branch.SetAddress(ROOT.AddressOf(dynamic_fmd_items[dynamicName]))
349  else:
350  dynamic_fmd_items[dynamicName] = None
351 
352 
353  metadata_tree.GetEntry(0)
354 
355  # This loads the dynamic attributes of the xAOD::FileMetaData from the TTree
356  for key in dynamic_fmd_items:
357  if dynamic_fmd_items[key] is None:
358  try:
359  if key.startswith("is"):
360  # this is probably a boolean
361  dynamic_fmd_items[key] = getattr(metadata_tree, key) != '\x00'
362  else:
363  # this should be a float
364  dynamic_fmd_items[key] = getattr(metadata_tree, key)
365  except AttributeError:
366  # should not happen, but just ignore missing attributes
367  pass
368  else:
369  # convert ROOT.std.string objects to python equivalent
370  dynamic_fmd_items[key] = str(dynamic_fmd_items[key])
371 
372  # clean the meta-dict if the meta_key_filter flag is used, to return only the key of interest
373  if meta_key_filter:
374  meta_dict[filename] = {}
375 
376  # read the metadata
377  for name, content in persistent_instances.items():
378  key = name
379  if hasattr(content, 'm_folderName'):
380  key = content.m_folderName
381 
382  # Some transition AODs contain both the Run2 and Run3 metadata formats. We only wish to read the Run3 format if such a file is encountered.
383  has_r3_trig_meta = ('TriggerMenuJson_HLT' in persistent_instances or 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT' in persistent_instances)
384  aux = None
385  if key.startswith('TriggerMenuJson_') and not key.endswith('Aux.'): # interface container for the menu (AOD)
386  aux = persistent_instances[key+'Aux.']
387  elif key.startswith('DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_') and not key.endswith('Aux.'): # interface container for the menu (ESD)
388  menuPart = key.split('_')[-1]
389  aux = persistent_instances['xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_'+menuPart+'Aux.']
390  elif key == 'TriggerMenu' and 'TriggerMenuAux.' in persistent_instances and not has_r3_trig_meta: # AOD case (legacy support, HLT and L1 menus)
391  aux = persistent_instances['TriggerMenuAux.']
392  elif key == 'DataVector<xAOD::TriggerMenu_v1>_TriggerMenu' and 'xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.' in persistent_instances and not has_r3_trig_meta: # ESD case (legacy support, HLT and L1 menus)
393  aux = persistent_instances['xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.']
394  elif (key == 'CutBookkeepers'
395  and 'CutBookkeepersAux.' in persistent_instances):
396  aux = persistent_instances['CutBookkeepersAux.']
397  elif key == 'CutBookkeepersAux.':
398  continue # Extracted using the interface object
399  elif (key == 'FileMetaData'
400  and 'FileMetaDataAux.' in persistent_instances):
401  aux = persistent_instances['FileMetaDataAux.']
402  elif (key == 'xAOD::FileMetaData_v1_FileMetaData'
403  and 'xAOD::FileMetaDataAuxInfo_v1_FileMetaDataAux.' in persistent_instances):
404  aux = persistent_instances['xAOD::FileMetaDataAuxInfo_v1_FileMetaDataAux.']
405  elif (key == 'TruthMetaData'
406  and 'TruthMetaDataAux.' in persistent_instances):
407  aux = persistent_instances['TruthMetaDataAux.']
408  elif key == 'TruthMetaDataAux.':
409  continue # Extracted using the interface object
410  elif 'Menu' in key and key.endswith('Aux.'):
411  continue # Extracted using the interface object
412 
413  return_obj = _convert_value(content, aux)
414 
415  if 'TriggerMenuJson' in key or ('TriggerMenu' in key and not has_r3_trig_meta):
416  if 'RAWTriggerMenuJson' in return_obj:
417  meta_dict[filename][key] = return_obj['RAWTriggerMenuJson']
418  del return_obj['RAWTriggerMenuJson']
419  if 'TriggerConfigInfo' not in meta_dict[filename]:
420  meta_dict[filename]['TriggerConfigInfo'] = {}
421  if 'dbkey' in return_obj:
422  meta_dict[filename]['TriggerConfigInfo'][key.split('_')[-1]] = {
423  'key' : return_obj['dbkey'],
424  'name': return_obj['name']
425  }
426  del return_obj['dbkey']
427  del return_obj['name']
428  if 'TriggerMenu' not in meta_dict[filename]:
429  meta_dict[filename]['TriggerMenu'] = {}
430  meta_dict[filename]['TriggerMenu'].update(return_obj)
431  elif "FileMetaData" in key:
432  if "FileMetaData" not in meta_dict[filename]:
433  meta_dict[filename]["FileMetaData"] = dynamic_fmd_items
434  meta_dict[filename]["FileMetaData"].update(return_obj)
435  else:
436  meta_dict[filename][key] = return_obj
437 
438  try:
439  # get the number of events from EventStreamInfo
440  esi_dict = next(key for key, value in meta_dict[filename].items()
441  if isinstance(value, dict) and "numberOfEvents" in value and
442  meta_dict[filename]["metadata_items"][key] == "EventStreamInfo")
443  msg.debug(f"{esi_dict=}")
444  meta_dict[filename]["nentries"] = meta_dict[filename][esi_dict]["numberOfEvents"]
445  except StopIteration as err:
446  msg.debug(f"Caught {err=}, {type(err)=}, falling back on opening the DataHeader"
447  " Container to read the number of entries")
448  meta_dict[filename]['nentries'] = dataheader_nentries(current_file)
449  msg.debug(f"{meta_dict[filename]['nentries']=}")
450 
451  if unique_tag_info_values and mode=='iov':
452  unique_tag_info_values = False
453  msg.info('disabling "unique_tag_info_values" option for "iov" mode')
454 
455  # This is a required workaround which will temporarily be fixing ATEAM-560 originated from ATEAM-531
456  # ATEAM-560: https://its.cern.ch/jira/browse/ATEAM-560
457  # ATEAM-531: https://its.cern.ch/jira/browse/ATEAM-531
458  # This changes will remove all duplicates values presented in some files due
459  # to the improper merging of two IOVMetaDataContainers.
460  if unique_tag_info_values:
461  msg.info('MetaReader is called with the parameter "unique_tag_info_values" set to True. '
462  'This is a workaround to remove all duplicate values from "/TagInfo" key')
463  if '/TagInfo' in meta_dict[filename]:
464  for key, value in meta_dict[filename]['/TagInfo'].items():
465  if isinstance(value, list) and value:
466  if len(unique_values := set(value)) > 1:
467  msg.warn(
468  f"Found multiple values for {key}: {value}. "
469  "Looking for possible duplicates."
470  )
471  maybe_ok = False
472  if key == "AMITag":
473  # curate duplicates like: ['s3681_q453', 's3681_q453_'] or ["s3681_q453", "q453_s3681"]
474  unique_amitags = set()
475  for amitags in unique_values:
476  unique_amitags.add(
477  "_".join({tag for tag in amitags.split("_") if tag})
478  )
479  if len(unique_amitags) == 1:
480  maybe_ok = True
481  elif key == "beam_energy":
482  # handle duplicates like: ['6500000', '6500000.0'] or [3, "3"]
483  unique_energies = set()
484  for energy in unique_values:
485  try:
486  energy = int(energy)
487  except ValueError:
488  try:
489  energy = float(energy)
490  except ValueError:
491  pass
492  unique_energies.add(energy)
493  if len(unique_energies) == 1:
494  maybe_ok = True
495  elif key in ["AtlasRelease", "IOVDbGlobalTag", "AODFixVersion"]:
496  maybe_ok = True
497  if maybe_ok:
498  msg.warn(
499  f"Multiple values for {key} may mean the same, or "
500  "the input file was produced in multi-step job. "
501  f"Ignoring all but the first entry: {key} = {value[0]}"
502  )
503  else:
504  raise ValueError(
505  f"{key} from /TagInfo contains more than 1 unique value: {value}"
506  )
507 
508  meta_dict[filename]['/TagInfo'][key] = value[0]
509 
510  if promote is None:
511  promote = mode == 'lite' or mode == 'peeker'
512 
513  # Filter the data and create a prettier output for the 'lite' mode
514  if mode == 'lite':
515  meta_dict = make_lite(meta_dict)
516 
517  if mode == 'peeker':
518  meta_dict = make_peeker(meta_dict)
519 
520  if promote:
521  meta_dict = promote_keys(meta_dict, mode)
522 
523  # If AnalysisBase the itemList must be grabbed another way
524  if not isGaudiEnv():
525  if isinstance(collectionTree, ROOT.TTree):
526  meta_dict[filename]['itemList'] = [ (b.GetClassName(), b.GetName()) for b in collectionTree.GetListOfBranches() ]
527 
528  # ----- retrieves metadata from bytestream (BS) files (RAW, DRAW) ------------------------------------------#
529  elif current_file_type == 'BS':
530 
531  if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and not os.path.isfile(filename):
532  msg.warn('Ignoring not accessible file: {}'.format(filename))
533  continue
534 
535  import eformat
536 
537  # store the number of entries
538  bs = eformat.istream(filename)
539  meta_dict[filename]['nentries'] = bs.total_events
540 
541  # store the 'guid' value
542  data_reader = eformat.EventStorage.pickDataReader(filename)
543  assert data_reader, 'problem picking a data reader for file [%s]' % filename
544 
545  # set auto flush equivalent, which for BS is always 1
546  meta_dict[filename]['auto_flush'] = 1
547 
548  if hasattr(data_reader, 'GUID'):
549  meta_dict[filename]['file_guid'] = data_reader.GUID()
550 
551  # compression level and algorithm, for BS always ZLIB
552  meta_dict[filename]['file_comp_alg'] = 1
553  meta_dict[filename]['file_comp_level'] = 1
554 
555 
556  # if the flag full is set to true then grab all metadata
557  # ------------------------------------------------------------------------------------------------------#
558  if mode != "tiny":
559  bs_metadata = {}
560 
561  for md in data_reader.freeMetaDataStrings():
562  if md.startswith('Event type:'):
563  k = 'eventTypes'
564  v = []
565  if 'is sim' in md:
566  v.append('IS_SIMULATION')
567  else:
568  v.append('IS_DATA')
569 
570  if 'is atlas' in md:
571  v.append('IS_ATLAS')
572  else:
573  v.append('IS_TESTBEAM')
574 
575  if 'is physics' in md:
576  v.append('IS_PHYSICS')
577  else:
578  v.append('IS_CALIBRATION')
579 
580  bs_metadata[k] = tuple(v)
581 
582  elif md.startswith('GeoAtlas:'):
583  k = 'geometry'
584  v = md.split('GeoAtlas:')[1].strip()
585  bs_metadata[k] = v
586 
587  elif md.startswith('IOVDbGlobalTag:'):
588  k = 'conditions_tag'
589  v = md.split('IOVDbGlobalTag:')[1].strip()
590  bs_metadata[k] = v
591 
592  elif '=' in md:
593  k, v = md.split('=')
594  bs_metadata[k] = v
595 
596  bs_metadata['detectorMask'] = data_reader.detectorMask()
597  bs_metadata['runNumbers'] = data_reader.runNumber()
598  bs_metadata['lumiBlockNumbers'] = data_reader.lumiblockNumber()
599  bs_metadata['projectTag'] = data_reader.projectTag()
600  bs_metadata['stream'] = data_reader.stream()
601  #bs_metadata['beamType'] = getattr(data_reader, 'beamType')()
602  beamTypeNbr= data_reader.beamType()
603  #According to info from Rainer and Guiseppe the beam type is
604  #O: no beam
605  #1: protons
606  #2: ions
607  if (beamTypeNbr==0): bs_metadata['beamType'] = 'cosmics'
608  elif (beamTypeNbr==1 or beamTypeNbr==2): bs_metadata['beamType'] = 'collisions'
609  else: bs_metadata['beamType'] = 'unknown'
610 
611  bs_metadata['beamEnergy'] = data_reader.beamEnergy()
612 
613  meta_dict[filename]['eventTypes'] = bs_metadata.get('eventTypes', [])
614  meta_dict[filename]['GeoAtlas'] = bs_metadata.get('geometry', None)
615  meta_dict[filename]['conditions_tag'] = bs_metadata.get('conditions_tag', None)
616  meta_dict[filename]['project_name'] = bs_metadata.get('projectTag', None)
617 
618  # Promote up one level
619  meta_dict[filename]['detectorMask'] = [bs_metadata.get('detectorMask', None)]
620  meta_dict[filename]['runNumbers'] = [bs_metadata.get('runNumbers', None)]
621  meta_dict[filename]['lumiBlockNumbers'] = [bs_metadata.get('lumiBlockNumbers', None)]
622  meta_dict[filename]['beam_type'] = bs_metadata.get('beamType', None)
623  meta_dict[filename]['beam_energy'] = bs_metadata.get('beamEnergy', None)
624  meta_dict[filename]['stream'] = bs_metadata.get('stream', None)
625 
626  if not data_reader.good():
627  # event-less file...
628  meta_dict[filename]['runNumbers'].append(bs_metadata.get('run_number', 0))
629  meta_dict[filename]['lumiBlockNumbers'].append(bs_metadata.get('LumiBlock', 0))
630 
631  msg.debug(f"{meta_dict[filename]=}")
632  msg.debug(f"{len(bs)=}")
633  if len(bs):
634  evt = bs[0]
635  try:
636  evt.check()
637  meta_dict[filename]['processingTags'] = [tag.name for tag in evt.stream_tag()]
638  meta_dict[filename]['evt_number'] = [evt.global_id()]
639  meta_dict[filename]['run_type'] = [eformat.helper.run_type2string(evt.run_type())]
640  # ATLASRECTS-7126: If there is no valid lumiblock information
641  # in the ByteStream header, get the info from the first event.
642  if meta_dict[filename]['lumiBlockNumbers'] == [0]:
643  msg.debug('Taking the luminosity block info from the first event (%i)', evt.lumi_block())
644  meta_dict[filename]['lumiBlockNumbers'] = [evt.lumi_block()]
645  # ATLASRECTS-7126: If there is no valid run number information
646  # in the ByteStream header, get the info from the first event.
647  if meta_dict[filename]['runNumbers'] == [0]:
648  msg.debug('Taking the run number info from the first event (%i)', evt.run_no())
649  meta_dict[filename]['runNumbers'] = [evt.run_no()]
650  except RuntimeError as err:
651  msg.error("Issue while reading the first event of BS file %r: %r", filename, err)
652  else:
653  msg.debug(f"{meta_dict[filename]=}")
654  else:
655  msg.warn(f"Event-less BS {filename=}, will not read metadata information from the first event")
656 
657  # fix for ATEAM-122
658  if len(bs_metadata.get('eventTypes', '')) == 0: # see: ATMETADATA-6
659  evt_type = ['IS_DATA', 'IS_ATLAS']
660  if bs_metadata.get('stream', '').startswith('physics_'):
661  evt_type.append('IS_PHYSICS')
662  elif bs_metadata.get('stream', '').startswith('calibration_'):
663  evt_type.append('IS_CALIBRATION')
664  elif bs_metadata.get('projectTag', '').endswith('_calib'):
665  evt_type.append('IS_CALIBRATION')
666  else:
667  evt_type.append('Unknown')
668 
669  meta_dict[filename]['eventTypes'] = evt_type
670 
671  if mode == 'full':
672  meta_dict[filename]['bs_metadata'] = bs_metadata
673 
674  # ------ Throw an error if the user provide other file types -------------------------------------------------#
675  else:
676  msg.error('Unknown filetype for {0} - there is no metadata interface for type {1}'.format(filename, current_file_type))
677  return None
678 
679  return meta_dict
680 
681 
683  import os
684  if 'AthSimulation_DIR' in os.environ:
685  return 'AthSimulation'
686  if 'AthGeneration_DIR' in os.environ:
687  return 'AthGeneration'
688  return 'Athena'
689 
690 
691 def _get_pfn(filename):
692  """
693  Extract the actual filename if LFN or PFN notation is used
694  """
695  pfx = filename[0:4]
696  if pfx == 'PFN:':
697  return filename[4:]
698  if pfx == 'LFN:':
699  import subprocess, os
700  os.environ['POOL_OUTMSG_LEVEL'] = 'Error'
701  output = subprocess.check_output(['FClistPFN','-l',filename[4:]],text=True).split('\n')
702  if len(output) == 2:
703  return output[0]
704  msg.error( 'FClistPFN({0}) returned unexpected number of lines:'.format(filename) )
705  msg.error( '\n'.join(output) )
706  return filename
707 
708 
709 def _read_guid(filename):
710  """
711  Extracts the "guid" (Globally Unique Identifier) in POOL files and Grid catalogs) value from a POOL file.
712  :param filename: the input file
713  :return: the guid value
714  """
715  import ROOT
716  root_file = ROOT.TFile.Open( _get_pfn(filename) )
717  params = root_file.Get('##Params')
718  if not isinstance(params, ROOT.TTree):
719  raise NotImplementedError(f"Cannot extract ##Params from object of type {type(params)!r}")
720 
721  regex = re.compile(r'\[NAME=(\w+)\]\[VALUE=(.*)\]', re.ASCII)
722  fid = None
723 
724  for entry in params:
725  param = entry.GetLeaf('db_string').GetValueString()
726 
727  result = regex.match(param)
728  if result and result.group(1) == 'FID' :
729  # don't exit yet, it's the last FID entry that counts
730  fid = result.group(2)
731 
732  return fid
733 
734 
736  result = {}
737 
738  for meth in dir(obj):
739  if not meth.startswith('_'):
740  if meth.startswith('m_'):
741 
742  field_name = str(meth)[2:]
743  field_value = getattr(obj, meth)
744 
745  result[field_name] = _convert_value(field_value)
746 
747  return result
748 
749 
750 def _convert_value(value, aux = None):
751  cl=value.__class__
752 
753  if hasattr(cl, '__cpp_name__'):
754  result = regex_cppname.match(cl.__cpp_name__)
755  if result:
756  cpp_type = result.group(1)
757  if cpp_type == 'vector' or cpp_type == 'std::vector':
758  return [_convert_value(val) for val in value]
759  elif cpp_type == 'set' or cpp_type == 'std::set':
760  return {_convert_value(val) for val in value}
761  elif cpp_type == 'pair' or cpp_type == 'std::pair':
762  return _convert_value(value.first), _convert_value(value.second)
763 
764  # elif cpp_type == 'long':
765  # return int(value)
766 
767  elif cpp_type == 'string' or cpp_type == 'std::string':
768  return str(value)
769 
770  elif cl.__cpp_name__ == "_Bit_reference":
771  return bool(value)
772 
773  # special case which extracts data in a better format from IOVPayloadContainer_p1 class
774  elif cl.__cpp_name__ == 'IOVMetaDataContainer_p1':
775  return _extract_fields_iovmdc(value)
776 
777  elif cl.__cpp_name__ == 'IOVPayloadContainer_p1':
778  global _gbl_mode
779  if _gbl_mode == 'iov':
780  return _extract_iov_detailed(value)
781  else:
782  return _extract_fields_iov( value, range(value.m_attrIndexes.size()) )
783 
784  elif cl.__cpp_name__ == 'xAOD::EventFormat_v1':
785  return _extract_fields_ef(value)
786  elif cl.__cpp_name__ == 'xAOD::CutBookkeeperContainer_v1':
787  return _extract_fields_cbk(interface=value, aux=aux)
788  elif cl.__cpp_name__ == 'xAOD::FileMetaData_v1':
789  return _extract_fields_fmd(interface=value, aux=aux)
790  elif cl.__cpp_name__ == 'DataVector<xAOD::TruthMetaData_v1>':
791  return _extract_fields_tmd(interface=value, aux=aux)
792 
793  elif cl.__cpp_name__ == 'DataVector<xAOD::TriggerMenu_v1>' :
794  return _extract_fields_triggermenu(interface=value, aux=aux)
795 
796  elif cl.__cpp_name__ == 'DataVector<xAOD::TriggerMenuJson_v1>' :
797  return _extract_fields_triggermenujson(interface=value, aux=aux)
798 
799  elif (cl.__cpp_name__ == 'EventStreamInfo_p1' or
800  cl.__cpp_name__ == 'EventStreamInfo_p2' or
801  cl.__cpp_name__ == 'EventStreamInfo_p3'):
802  return _extract_fields_esi(value)
803 
804  elif (cl.__cpp_name__ == 'EventType_p1' or
805  cl.__cpp_name__ == 'EventType_p3'):
806  fields = _extract_fields(value)
807  fields = _convert_event_type_bitmask(fields)
808  fields = _convert_event_type_user_type(fields)
809  return fields
810 
811  elif regex_persistent_class.match(cl.__cpp_name__):
812  return _extract_fields(value)
813 
814  return value
815 
816 
817 def _get_attribute_val(iov_container, attr_name, attr_idx):
818  type_idx = attr_idx.typeIndex()
819  obj_idx = attr_idx.objIndex()
820 
821  attr_value = None
822 
823  if type_idx == 0:
824  attr_value = bool(iov_container.m_bool[obj_idx])
825  elif type_idx == 1:
826  attr_value = int(iov_container.m_char[obj_idx])
827  elif type_idx == 2:
828  attr_value = int(iov_container.m_unsignedChar[obj_idx])
829  elif type_idx == 3:
830  attr_value = int(iov_container.m_short[obj_idx])
831  elif type_idx == 4:
832  attr_value = int(iov_container.m_unsignedShort[obj_idx])
833  elif type_idx == 5:
834  attr_value = int(iov_container.m_int[obj_idx])
835  elif type_idx == 6:
836  attr_value = int(iov_container.m_unsignedInt[obj_idx])
837  elif type_idx == 7:
838  attr_value = int(iov_container.m_long[obj_idx])
839  elif type_idx == 8:
840  attr_value = int(iov_container.m_unsignedLong[obj_idx])
841  elif type_idx == 9:
842  attr_value = int(iov_container.m_longLong[obj_idx])
843  elif type_idx == 10:
844  attr_value = int(iov_container.m_unsignedLongLong[obj_idx])
845  elif type_idx == 11:
846  attr_value = float(iov_container.m_float[obj_idx])
847  elif type_idx == 12:
848  attr_value = float(iov_container.m_double[obj_idx])
849  elif type_idx == 13:
850  # skipping this type because is file IOVPayloadContainer_p1.h (line 120) is commented and not considered
851  pass
852  elif type_idx == 14:
853  attr_value = str(iov_container.m_string[obj_idx])
854  # Cleaning class name from value
855  if attr_value.startswith('IOVMetaDataContainer_p1_'):
856  attr_value = attr_value.replace('IOVMetaDataContainer_p1_', '')
857  if attr_value.startswith('_'):
858  attr_value = attr_value.replace('_', '/')
859  # Now it is clean
860  elif type_idx == 15:
861  attr_value = int(iov_container.m_date[obj_idx])
862  elif type_idx == 16:
863  attr_value = int(iov_container.m_timeStamp[obj_idx])
864  else:
865  raise ValueError('Unknown type id {0} for attribute {1}'.format(type_idx, attr_name))
866 
867  return attr_value
868 
869 
870 def _extract_fields_iov( iov_container, idx_range ):
871  result = {}
872 
873  for idx in idx_range:
874  attr_idx = iov_container.m_attrIndexes[idx]
875  name_idx = attr_idx.nameIndex()
876  attr_name = iov_container.m_attrName[name_idx]
877  attr_value = _get_attribute_val(iov_container, attr_name, attr_idx)
878 
879  if attr_name not in result:
880  result[attr_name] = [attr_value]
881  else:
882  result[attr_name].append(attr_value)
883 
884  max_element_count = 0
885  for content in result.values():
886  if len(content) > max_element_count:
887  max_element_count = len(content)
888 
889  if max_element_count <= 1:
890  for name, content in result.items():
891  if len(content) > 0:
892  result[name] = content[0]
893  else:
894  result[name] = None
895 
896  return result
897 
898 
899 def _extract_iov_detailed(iov_container):
900  def iovtostr(t):
901  # break iov time into high and low halves (run number usually in the higher half)
902  return "({h}:{l})".format(h=t>>32, l=t&(2^32-1))
903 
904  def extract_list_collection(iov_container, listCollection ):
905  result = {}
906  ln = 0
907  for list in listCollection.m_attrLists:
908  ln = ln + 1
909  lname = 'List {ln}: iov=[{s} ,{e}]; Channel#={ch}'.format(
910  ln=ln, s=iovtostr(list.m_range.m_start),
911  e=iovtostr(list.m_range.m_stop),
912  ch=list.m_channelNumber )
913  result[ lname ] = _extract_fields_iov( iov_container, range(list.m_firstIndex, list.m_lastIndex) )
914  return result
915 
916  result = {}
917  pn = 0
918  for listCollection in iov_container.m_payloadVec:
919  pn = pn + 1
920  pname = 'IOV range {n}: [{s}, {e}]'.format(n=pn, s=iovtostr(listCollection.m_start),
921  e=iovtostr(listCollection.m_stop))
922  result[ pname ] = extract_list_collection(iov_container, listCollection )
923  return result
924 
925 
927  return _convert_value(value.m_payload)
928 
929 
931  result = {}
932 
933  result['eventTypes'] = []
934  for eventType in value.m_eventTypes:
935  result['eventTypes'].append(_convert_value(eventType))
936 
937  result['numberOfEvents'] = value.m_numberOfEvents
938  result['runNumbers'] = list(value.m_runNumbers)
939  result['lumiBlockNumbers'] = list(value.m_lumiBlockNumbers)
940  result['processingTags'] = [str(v) for v in value.m_processingTags]
941  result['itemList'] = []
942 
943  # Get the class name in the repository with CLID <clid>
944  from CLIDComps.clidGenerator import clidGenerator
945  cgen = clidGenerator("")
946  for clid, sgkey in value.m_itemList:
947  result['itemList'].append((cgen.getNameFromClid(clid), sgkey))
948 
949  return result
950 
951 
953  result = {}
954 
955  for ef_element in value:
956  result[ef_element.first] = ef_element.second.className()
957 
958  return result
959 
960 
961 def _extract_fields_cbk(interface=None, aux=None):
962  """Extract CutBookkeeper content into dictionary
963 
964  This function takes the CutBookkeeperContainer_v1 and CutBookkeeperAuxContainer_v1 objects.
965  It makes sure the the interface object uses the auxiliary object as store.
966  Args:
967  interface (CutBookkeeperContainer_v1): the interface class
968  aux (CutBookkeeperAuxContainer_v1): auxiliary container object
969  Returns
970  dict: with the cycle number and last stream
971  """
972  if not interface or not aux:
973  return {}
974  interface.setStore(aux)
975 
976  max_cycle = -1
977  input_stream = ''
978 
979  for cbk in interface:
980  current_cycle = int(cbk.cycle())
981  if current_cycle > max_cycle:
982  max_cycle = current_cycle
983  input_stream = str(cbk.inputStream())
984 
985  result = {
986  'currentCutCycle': max_cycle,
987  'currentCutInputStream': input_stream,
988  }
989  return result
990 
991 
992 def _extract_fields_fmd(interface=None, aux=None):
993  """Turn static FileMetaData content into dictionary
994 
995  This function takes the FileMetaData_v1 and FileMetaDataAuxInfo_v1 objects.
996  It makes sure the the interface object uses the auxiliary object as store.
997  Next the two static variables of FileMetaDataAuxInfo_v1 are retrieved and
998  added to the dictionary that is returned.
999  Args:
1000  interface (FileMetaData_v1): the interface class
1001  aux (FileMetaDataAuxInfo_v1): auxiliary container object
1002  Returns
1003  dict: with the production release and dataType
1004  """
1005  import ROOT
1006  if not interface or not aux:
1007  return {}
1008  interface.setStore(aux)
1009  metaContent = {
1010  "productionRelease": ROOT.std.string(),
1011  "dataType": ROOT.std.string(),
1012  "runNumbers": ROOT.std.vector('unsigned int')(),
1013  "lumiBlocks": ROOT.std.vector('unsigned int')(),
1014  }
1015  # Note: using this for dynamic attributes returns empty content
1016  for k, v in metaContent.items():
1017  try:
1018  interface.value(getattr(interface, k), v)
1019  except AttributeError:
1020  interface.value(k, v)
1021  # Now return python objects
1022  result = {k: str(v) for k, v in metaContent.items() if type(v) is ROOT.std.string}
1023  result.update({k: list(v) for k, v in metaContent.items() if type(v) is ROOT.std.vector('unsigned int')})
1024  return result
1025 
1026 
1027 def _extract_fields_tmd(interface=None, aux=None):
1028  import ROOT
1029  BadAuxVarException = ROOT.SG.ExcBadAuxVar
1030  """Extract TruthMetaData content into dictionary
1031 
1032  This function takes the TruthMetaDataContainer_v1 and TruthMetaDataAuxContainer_v1 objects.
1033  It makes sure the the interface object uses the auxiliary object as store.
1034  Args:
1035  interface (TruthMetaDataContainer_v1): the interface class
1036  aux (TruthMetaDataAuxContainer_v1): auxiliary container object
1037  Returns
1038  dict
1039  """
1040  if not interface or not aux:
1041  return {}
1042  interface.setStore(aux)
1043 
1044  # return the first as we do not really expect more than one
1045  result = {}
1046  for tmd in interface:
1047  result['mcChannelNumber'] = tmd.mcChannelNumber()
1048 
1049  try:
1050  result['weightNames'] = list(tmd.weightNames())
1051  except BadAuxVarException:
1052  result['weightNames'] = []
1053 
1054  try:
1055  result['lhefGenerator'] = str(tmd.lhefGenerator())
1056  except BadAuxVarException:
1057  result['lhefGenerator'] = ''
1058 
1059  try:
1060  result['generators'] = str(tmd.generators())
1061  except BadAuxVarException:
1062  result['generators'] = ''
1063 
1064  try:
1065  result['evgenProcess'] = str(tmd.evgenProcess())
1066  except BadAuxVarException:
1067  result['evgenProcess'] = ''
1068 
1069  try:
1070  result['evgenTune'] = str(tmd.evgenTune())
1071  except BadAuxVarException:
1072  result['evgenTune'] = ''
1073 
1074  try:
1075  result['hardPDF'] = str(tmd.hardPDF())
1076  except BadAuxVarException:
1077  result['hardPDF'] = ''
1078 
1079  try:
1080  result['softPDF'] = str(tmd.softPDF())
1081  except BadAuxVarException:
1082  result['softPDF'] = ''
1083 
1084  return result
1085 
1086 
1087 """ Note: Deprecated. Legacy support for Run 2 AODs produced in release 21 or in release 22 prior to April 2021
1088 """
1089 def _extract_fields_triggermenu(interface, aux):
1090  if aux is None:
1091  return {}
1092 
1093  L1Items = []
1094  HLTChains = []
1095 
1096  try:
1097  interface.setStore( aux )
1098  if interface.size() > 0:
1099  # We make the assumption that the first stored SMK is
1100  # representative of all events in the input collection.
1101  firstMenu = interface.at(0)
1102  L1Items = [ _convert_value(item) for item in firstMenu.itemNames() ]
1103  HLTChains = [ _convert_value(chain) for chain in firstMenu.chainNames() ]
1104  except Exception as err: # noqa: F841
1105  msg.warn('Problem reading xAOD::TriggerMenu:')
1106 
1107  result = {}
1108  result['L1Items'] = L1Items
1109  result['HLTChains'] = HLTChains
1110 
1111  return result
1112 
1114  result = {}
1115 
1116  try:
1117  interface.setStore( aux )
1118  if interface.size() > 0:
1119  # We make the assumption that the first stored SMK is
1120  # representative of all events in the input collection.
1121  firstMenu = interface.at(0)
1122  import json
1123  decoded = json.loads(firstMenu.payload())
1124  result['RAWTriggerMenuJson'] = firstMenu.payload()
1125  result['name'] = firstMenu.name()
1126  result['dbkey'] = firstMenu.key()
1127  if decoded['filetype'] == 'hltmenu':
1128  result['HLTChains'] = [ _convert_value(chain) for chain in decoded['chains'] ]
1129  elif decoded['filetype'] == 'l1menu':
1130  result['L1Items'] = [ _convert_value(item) for item in decoded['items'] ]
1131  elif decoded['filetype'] in ['bunchgroupset', 'hltprescale', 'l1prescale', 'hltmonitoringsummary']:
1132  return result
1133 
1134  else:
1135  msg.warn('Got an xAOD::TriggerMenuJson called {0} but only expecting hltmenu or l1menu'.format(decoded['filetype']))
1136  return {}
1137 
1138  except Exception as err: # noqa: F841
1139  msg.warn('Problem reading xAOD::TriggerMenuJson')
1140 
1141  return result
1142 
1144  if 'user_type' in value:
1145  items = value['user_type'].split('#')[3:]
1146  for i in range(0, len(items), 2):
1147  value[items[i]] = _convert_value(items[i+1])
1148  return value
1149 
1151 
1152  types = None
1153  for key in value:
1154  if key == 'bit_mask':
1155  val = value[key]
1156 
1157  bitmask_length = len(val)
1158 
1159  is_simulation = False
1160  is_testbeam = False
1161  is_calibration = False
1162 
1163  if bitmask_length > 0: # ROOT.EventType.IS_SIMULATION
1164  is_simulation = val[0]
1165 
1166  if bitmask_length > 1: # ROOT.EventType.IS_TESTBEAM
1167  is_testbeam = val[1]
1168 
1169  if bitmask_length > 2: # ROOT.EventType.IS_CALIBRATION:
1170  is_calibration = val[2]
1171 
1172  types = [
1173  'IS_SIMULATION' if is_simulation else 'IS_DATA',
1174  'IS_TESTBEAM' if is_testbeam else 'IS_ATLAS',
1175  'IS_CALIBRATION' if is_calibration else 'IS_PHYSICS'
1176  ]
1177 
1178  value['type'] = types
1179  return value
1180 
1181 
1182 def make_lite(meta_dict):
1183  for filename, file_content in meta_dict.items():
1184  for key in file_content:
1185  if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
1186  for item in list(meta_dict[filename][key]):
1187  if item not in lite_primary_keys_to_keep:
1188  meta_dict[filename][key].pop(item)
1189 
1190  if '/TagInfo' in file_content:
1191 
1192 
1193  for item in list(meta_dict[filename]['/TagInfo']):
1194  if item not in lite_TagInfo_keys_to_keep:
1195  meta_dict[filename]['/TagInfo'].pop(item)
1196  return meta_dict
1197 
1198 
1199 def make_peeker(meta_dict):
1200  for filename, file_content in meta_dict.items():
1201  for key in file_content:
1202  if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
1203  keys_to_keep = [
1204  'lumiBlockNumbers',
1205  'runNumbers',
1206  'mc_event_number',
1207  'mc_channel_number',
1208  'eventTypes',
1209  'processingTags',
1210  'itemList'
1211  ]
1212  for item in list(meta_dict[filename][key]):
1213  if item not in keys_to_keep:
1214  meta_dict[filename][key].pop(item)
1215 
1216  if '/TagInfo' in file_content:
1217  keys_to_keep = [
1218  'beam_energy',
1219  'beam_type',
1220  'GeoAtlas',
1221  'IOVDbGlobalTag',
1222  'AODFixVersion',
1223  'AMITag',
1224  'project_name',
1225  'triggerStreamOfFile',
1226  'AtlasRelease',
1227  'specialConfiguration',
1228  'mc_campaign',
1229  'hepmc_version',
1230  'generators',
1231  'data_year',
1232  ]
1233  for item in list(meta_dict[filename]['/TagInfo']):
1234  if item not in keys_to_keep:
1235  meta_dict[filename]['/TagInfo'].pop(item)
1236 
1237  if '/Simulation/Parameters' in file_content:
1238  keys_to_keep = [
1239  'G4Version',
1240  'TruthStrategy',
1241  'SimBarcodeOffset',
1242  'RegenerationIncrement',
1243  'TRTRangeCut',
1244  'SimulationFlavour',
1245  'Simulator',
1246  'PhysicsList',
1247  'SimulatedDetectors',
1248  ]
1249  for item in list(meta_dict[filename]['/Simulation/Parameters']):
1250  if item not in keys_to_keep:
1251  meta_dict[filename]['/Simulation/Parameters'].pop(item)
1252 
1253  if '/Digitization/Parameters' in file_content:
1254  keys_to_keep = [
1255  'numberOfCollisions',
1256  'intraTrainBunchSpacing',
1257  'BeamIntensityPattern'
1258  'physicsList',
1259  'digiSteeringConf',
1260  'pileUp',
1261  'DigitizedDetectors',
1262  ]
1263  for item in list(meta_dict[filename]['/Digitization/Parameters']):
1264  if item not in keys_to_keep:
1265  meta_dict[filename]['/Digitization/Parameters'].pop(item)
1266 
1267  if 'CutBookkeepers' in file_content:
1268  keys_to_keep = [
1269  'currentCutCycle',
1270  'currentCutInputStream',
1271  ]
1272  for item in list(meta_dict[filename]['CutBookkeepers']):
1273  if item not in keys_to_keep:
1274  meta_dict[filename]['CutBookkeepers'].pop(item)
1275 
1276  if 'TruthMetaData' in file_content:
1277  keys_to_keep = [
1278  'mcChannelNumber',
1279  'weightNames',
1280  ]
1281  for item in list(meta_dict[filename]['TruthMetaData']):
1282  if item not in keys_to_keep:
1283  meta_dict[filename]['TruthMetaData'].pop(item)
1284 
1285  return meta_dict
1286 
1287 
1288 def promote_keys(meta_dict, mode):
1289  for filename, file_content in meta_dict.items():
1290  md = meta_dict[filename]
1291  for key in file_content:
1292  if key in md['metadata_items'] and regexEventStreamInfo.match(md['metadata_items'][key]):
1293  md.update(md[key])
1294 
1295  if 'eventTypes' in md and len(md['eventTypes']):
1296  et = md['eventTypes'][0]
1297  md['mc_event_number'] = et.get('mc_event_number', md['runNumbers'][0])
1298  if 'mc_channel_number' in et:
1299  md['mc_channel_number'] = et.get('mc_channel_number', None)
1300  md['eventTypes'] = et['type']
1301 
1302  # For very old files
1303  if 'GeoAtlas' in et:
1304  md['GeoAtlas'] = et.get('GeoAtlas', None)
1305  if 'IOVDbGlobalTag' in et:
1306  md['IOVDbGlobalTag'] = et.get('IOVDbGlobalTag', None)
1307 
1308  if 'lumiBlockNumbers' in md[key]:
1309  md['lumiBlockNumbers'] = md[key]['lumiBlockNumbers']
1310 
1311  if 'processingTags' in md[key]:
1312  md['processingTags'] = md[key]['processingTags']
1313 
1314  meta_dict[filename].pop(key)
1315  break
1316 
1317  if not isGaudiEnv() and key in md['metadata_items'] and 'FileMetaData' in key:
1318  if 'beamType' in md[key]:
1319  md['beam_type'] = md[key]['beamType']
1320 
1321  if 'runNumbers' in md[key]:
1322  md['runNumbers'] = md[key]['runNumbers']
1323 
1324  if 'mcProcID' in md[key]:
1325  md['mc_channel_number'] = int(md[key]['mcProcID'])
1326 
1327  if 'mcCampaign' in md[key]:
1328  md['mc_campaign'] = md[key]['mcCampaign']
1329 
1330  if 'dataYear' in md[key]:
1331  md['data_year'] = int(md[key]['dataYear'])
1332 
1333  if 'lumiBlocks' in md[key]:
1334  md['lumiBlockNumbers'] = md[key]['lumiBlocks']
1335 
1336  if mode == 'peeker' and 'amiTag' in md[key]:
1337  md['AMITag'] = md[key]['amiTag']
1338 
1339  if 'beamEnergy' in md[key]:
1340  md['beam_energy'] = int(md[key]['beamEnergy'])
1341 
1342  if 'geometryVersion' in md[key]:
1343  md['GeoAtlas'] = md[key]['geometryVersion']
1344 
1345  # EventType checks
1346  md['eventTypes'] = []
1347  if mode == 'peeker' and 'simFlavour' in md[key]:
1348  md['SimulationFlavour'] = md[key]['simFlavour']
1349 
1350  if 'simFlavour' in md[key] and ('FullG4' in md[key]['simFlavour'] or 'ATLFAST' in md[key]['simFlavour']):
1351  md['eventTypes'].append('IS_SIMULATION')
1352  else:
1353  md['eventTypes'].append('IS_DATA')
1354 
1355  if 'GeoAtlas' in md and 'ATLAS' in md['GeoAtlas']:
1356  md['eventTypes'].append('IS_ATLAS')
1357  # this is probably safe to assume for all files used in AnalysisBase
1358  md['eventTypes'].append('IS_PHYSICS')
1359  else:
1360  md['eventTypes'].append('IS_TESTBEAM')
1361 
1362  if 'dataType' in md[key]:
1363  md['processingTags'] = [md[key]['dataType']]
1364 
1365  if mode == 'peeker':
1366  if 'productionRelease' in md[key]:
1367  md['AtlasRelease'] = md[key]['productionRelease']
1368 
1369  if 'generatorsInfo' in md[key]:
1370  md['generators'] = md[key]['generatorsInfo']
1371 
1372  if mode == 'lite':
1373  meta_dict[filename].pop(key)
1374  break
1375 
1376  if '/TagInfo' in file_content:
1377  md.update(md['/TagInfo'])
1378  md.pop('/TagInfo')
1379 
1380  if '/Generation/Parameters' in file_content:
1381  md.update(md['/Generation/Parameters'])
1382  md.pop('/Generation/Parameters')
1383 
1384  if '/Simulation/Parameters' in file_content:
1385  md.update(md['/Simulation/Parameters'])
1386  md.pop('/Simulation/Parameters')
1387 
1388  if '/Digitization/Parameters' in file_content:
1389  md.update(md['/Digitization/Parameters'])
1390  md.pop('/Digitization/Parameters')
1391 
1392  if 'CutBookkeepers' in file_content:
1393  md.update(md['CutBookkeepers'])
1394  md.pop('CutBookkeepers')
1395 
1396  return meta_dict
1397 
1398 
1399 def convert_itemList(metadata, layout):
1400  """
1401  This function will rearrange the itemList values to match the format of 'eventdata_items', 'eventdata_itemsList'
1402  or 'eventdata_itemsDic' generated with the legacy file peeker tool
1403  :param metadata: a dictionary obtained using read_metadata method.
1404  The mode for read_metadata must be 'peeker of 'full'
1405  :param layout: the mode in which the data will be converted:
1406  * for 'eventdata_items' use: layout= None
1407  * for 'eventdata_itemsList' use: layout= '#join'
1408  * for 'eventdata_itemsDic' use: layout= 'dict'
1409  """
1410 
1411  # Find the itemsList:
1412  item_list = None
1413 
1414  if 'itemList' in metadata:
1415  item_list = metadata['itemList']
1416  else:
1417 
1418  current_key = None
1419 
1420  for key in metadata:
1421  if 'metadata_items' in metadata and key in metadata['metadata_items'] and metadata['metadata_items'][key] == 'EventStreamInfo_p3':
1422  current_key = key
1423  break
1424  if current_key is not None:
1425  item_list = metadata[current_key]['itemList']
1426 
1427  if item_list is not None:
1428 
1429  if layout is None:
1430  return item_list
1431 
1432  elif layout == '#join':
1433  return [k + '#' + v for k, v in item_list if k]
1434 
1435 
1436  elif layout == 'dict':
1437  from collections import defaultdict
1438  dic = defaultdict(list)
1439 
1440  for k, v in item_list:
1441  dic[k].append(v)
1442 
1443  return dict(dic)
1444 
1445 
1447  """Extract number of entries from DataHeader.
1448 
1449  infile ROOT TFile object or filename string
1450  return Number of entries as returned by DataHeader object in infile,
1451  None in absence of DataHeader object
1452  """
1453  import ROOT
1454  from PyUtils.PoolFile import PoolOpts
1455  if not isinstance(infile, ROOT.TFile):
1456  infile = ROOT.TFile.Open(infile)
1457 
1458  for name in {PoolOpts.TTreeNames.DataHeader, PoolOpts.RNTupleNames.DataHeader}:
1459  obj = infile.Get(name)
1460  msg.debug(f"dataheader_nentries: {name=}, {obj=}, {type(obj)=}")
1461  if not obj:
1462  continue
1463  if isinstance(obj, ROOT.TTree):
1464  return obj.GetEntriesFast()
1465  else:
1466  # check early to avoid scary ROOT read errors
1467  if ROOT.gROOT.GetVersionInt() < 63100:
1468  raise RuntimeError("ROOT ver. 6.31/01 or greater needed to read RNTuple files")
1469  if isinstance(obj, ROOT.Experimental.RNTuple):
1470  return ROOT.Experimental.RNTupleReader.Open(obj).GetNEntries()
1471  else:
1472  raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
replace
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition: hcg.cxx:307
python.MetaReader._extract_iov_detailed
def _extract_iov_detailed(iov_container)
Definition: MetaReader.py:899
vtune_athena.format
format
Definition: vtune_athena.py:14
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
python.MetaReader.read_metadata
def read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter=None, unique_tag_info_values=True, ignoreNonExistingLocalFiles=False)
Definition: MetaReader.py:52
python.MetaReader._extract_fields_tmd
def _extract_fields_tmd(interface=None, aux=None)
Definition: MetaReader.py:1027
python.MetaReader._extract_fields_esi
def _extract_fields_esi(value)
Definition: MetaReader.py:930
python.MetaReader._extract_fields_iovmdc
def _extract_fields_iovmdc(value)
Definition: MetaReader.py:926
python.MetaReader.make_peeker
def make_peeker(meta_dict)
Definition: MetaReader.py:1199
dumpHVPathFromNtuple.append
bool append
Definition: dumpHVPathFromNtuple.py:91
python.MetaReader._extract_fields
def _extract_fields(obj)
Definition: MetaReader.py:735
python.MetaReader._check_project
def _check_project()
Definition: MetaReader.py:682
python.MetaReader._get_pfn
def _get_pfn(filename)
Definition: MetaReader.py:691
python.MetaReader._extract_fields_iov
def _extract_fields_iov(iov_container, idx_range)
Definition: MetaReader.py:870
fillPileUpNoiseLumi.next
next
Definition: fillPileUpNoiseLumi.py:52
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
python.MetaReader._convert_event_type_bitmask
def _convert_event_type_bitmask(value)
Definition: MetaReader.py:1150
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
python.MetaReader.make_lite
def make_lite(meta_dict)
Definition: MetaReader.py:1182
python.AthConfigFlags.isGaudiEnv
def isGaudiEnv()
Definition: AthConfigFlags.py:14
python.MetaReader._convert_event_type_user_type
def _convert_event_type_user_type(value)
Definition: MetaReader.py:1143
python.MetaReader._extract_fields_triggermenu
def _extract_fields_triggermenu(interface, aux)
Definition: MetaReader.py:1089
beamspotman.dir
string dir
Definition: beamspotman.py:623
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:224
python.MetaReader._convert_value
def _convert_value(value, aux=None)
Definition: MetaReader.py:750
python.MetaReader._extract_fields_cbk
def _extract_fields_cbk(interface=None, aux=None)
Definition: MetaReader.py:961
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.MetaReader._read_guid
def _read_guid(filename)
Definition: MetaReader.py:709
TrigJetMonitorAlgorithm.items
items
Definition: TrigJetMonitorAlgorithm.py:79
python.MetaReader._extract_fields_triggermenujson
def _extract_fields_triggermenujson(interface, aux)
Definition: MetaReader.py:1113
python.MetaReader.promote_keys
def promote_keys(meta_dict, mode)
Definition: MetaReader.py:1288
Trk::open
@ open
Definition: BinningType.h:40
python.MetaReader._extract_fields_fmd
def _extract_fields_fmd(interface=None, aux=None)
Definition: MetaReader.py:992
dqt_zlumi_pandas.update
update
Definition: dqt_zlumi_pandas.py:42
python.MetaReader._extract_fields_ef
def _extract_fields_ef(value)
Definition: MetaReader.py:952
python.CaloScaleNoiseConfig.type
type
Definition: CaloScaleNoiseConfig.py:78
python.MetaReader.dataheader_nentries
def dataheader_nentries(infile)
Definition: MetaReader.py:1446
python.MetaReader.convert_itemList
def convert_itemList(metadata, layout)
Definition: MetaReader.py:1399
str
Definition: BTagTrackIpAccessor.cxx:11
python.MetaReader._get_attribute_val
def _get_attribute_val(iov_container, attr_name, attr_idx)
Definition: MetaReader.py:817
xAOD::bool
setBGCode setTAP setLVL2ErrorBits bool
Definition: TrigDecision_v1.cxx:60
readCCLHist.float
float
Definition: readCCLHist.py:83
Trk::split
@ split
Definition: LayerMaterialProperties.h:38