ATLAS Offline Software
Loading...
Searching...
No Matches
MetaReader.py
Go to the documentation of this file.
1# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
2
3import os
4import re
5from fnmatch import fnmatchcase
6from AthenaCommon.Logging import logging
7from AthenaConfiguration.AthConfigFlags import isGaudiEnv
8from PyUtils.PoolFile import isRNTuple
9from ROOT import gSystem
10from AthenaConfiguration.Enums import Project
11
12
13msg = logging.getLogger('MetaReader')
14
15# compile the regex needed in _convert_value() outside it to optimize the code.
16regexEventStreamInfo = re.compile(r'^EventStreamInfo(_p\d+)?$')
17regexIOVMetaDataContainer = re.compile(r'^IOVMetaDataContainer(_p\d+)?$')
18regexByteStreamMetadataContainer = re.compile(r'^ByteStreamMetadataContainer(_p\d+)?$')
19regexXAODCutBookkeeperContainer = re.compile(r'^xAOD::CutBookkeeperContainer(_v\d+)?$')
20regexXAODCutBookkeeperContainerAux = re.compile(r'^xAOD::CutBookkeeperAuxContainer(_v\d+)?$')
21regexXAODEventFormat = re.compile(r'^xAOD::EventFormat(_v\d+)?$')
22regexXAODFileMetaData = re.compile(r'^xAOD::FileMetaData(_v\d+)?$')
23regexXAODFileMetaDataAux = re.compile(r'^xAOD::FileMetaDataAuxInfo(_v\d+)?$')
24regexXAODFileMetaDataAuxDyn = re.compile(r'^(xAOD::)?FileMetaData.*AuxDyn(\.[a-zA-Z0-9]+)?$')
25regexXAODTriggerMenu = re.compile(r'^DataVector<xAOD::TriggerMenu(_v\d+)?>$') # Run 2
26regexXAODTriggerMenuAux = re.compile(r'^xAOD::TriggerMenuAuxContainer(_v\d+)?$') # Run 2
27regexXAODTriggerMenuJson = re.compile(r'^DataVector<xAOD::TriggerMenuJson(_v\d+)?>$') # Run 3
28regexXAODTriggerMenuJsonAux = re.compile(r'^xAOD::TriggerMenuJsonAuxContainer(_v\d+)?$') # Run 3
29regexXAODTruthMetaData = re.compile(r'^DataVector<xAOD::TruthMetaData(_v\d+)?>$')
30regexXAODTruthMetaDataAux = re.compile(r'^xAOD::TruthMetaDataAuxContainer(_v\d+)?$')
31regex_cppname = re.compile(r'^([\w:]+)(<.*>)?$')
32# regex_persistent_class = re.compile(r'^([a-zA-Z]+_p\d+::)*[a-zA-Z]+_p\d+$')
33regex_persistent_class = re.compile(r'^([a-zA-Z]+(_[pv]\d+)?::)*[a-zA-Z]+_[pv]\d+$')
34regex_BS_files = re.compile(r'^(\w+):.*((\.D?RAW\..*)|(\.data$))')
35regex_URI_scheme = re.compile(r'^([A-Za-z0-9\+\.\-]+)\:')
36
37lite_primary_keys_to_keep = [
38 'lumiBlockNumbers', 'runNumbers', 'mc_event_number', 'mc_channel_number',
39 'eventTypes', 'processingTags', 'itemList']
40lite_TagInfo_keys_to_keep = [
41 'beam_energy', 'beam_type', 'GeoAtlas', 'IOVDbGlobalTag',
42 'AODFixVersion', 'project_name', 'mc_campaign', 'keywords']
43
44trigger_keys = [
45 'TriggerConfigInfo',
46 'TriggerMenu', 'TriggerMenuJson_BG', 'TriggerMenuJson_HLT', 'TriggerMenuJson_HLTMonitoring', 'TriggerMenuJson_HLTPS', 'TriggerMenuJson_L1', 'TriggerMenuJson_L1PS',
47 '/TRIGGER/HLT/Groups', '/TRIGGER/HLT/HltConfigKeys', '/TRIGGER/HLT/Menu', '/TRIGGER/HLT/PrescaleKey', '/TRIGGER/HLT/Prescales',
48 '/TRIGGER/LVL1/ItemDef', '/TRIGGER/LVL1/Lvl1ConfigKey', '/TRIGGER/LVL1/Menu', '/TRIGGER/LVL1/Prescales', '/TRIGGER/LVL1/Thresholds',
49 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenu', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_BG', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT',
50 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS',
51 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS',
52]
53
54trigger_menu_json_map = {
55 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1PSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS",
56 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_BGAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_BG",
57 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT",
58 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTMonitoringAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring",
59 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTPSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS",
60 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1Aux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1",
61 "TriggerMenuJson_L1PSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS",
62 "TriggerMenuJson_BGAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_BG",
63 "TriggerMenuJson_HLTAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT",
64 "TriggerMenuJson_HLTMonitoringAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring",
65 "TriggerMenuJson_HLTPSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS",
66 "TriggerMenuJson_L1Aux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1",
67}
68
69
70def read_metadata(filenames, file_type = None, mode = 'lite', promote = None, meta_key_filter = None,
71 unique_tag_info_values = True, ignoreNonExistingLocalFiles=False):
72 """
73 This tool is independent of Athena framework and returns the metadata from a given file.
74 :param filenames: the input file from which metadata needs to be extracted.
75 :param file_type: the type of file. POOL or BS (bytestream: RAW, DRAW) files.
76 :param mode: if true, will return all metadata associated with the filename. By default, is false and this will
77 return a "tiny" version which have only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'.
78 :return: a dictionary of metadata for the given input file.
79 """
80
81 # make the mode available in the _convert methods
82 global _gbl_mode
83 _gbl_mode = mode
84
85 from RootUtils import PyROOTFixes # noqa F401
86
87 # Check if the input is a file or a list of files.
88 if isinstance(filenames, str):
89 filenames = [filenames]
90
91 # Check if file_type is an allowed value
92 if file_type is not None:
93 if file_type not in ('POOL', 'BS'):
94 raise NameError('Allowed values for \'file_type\' parameter are: "POOL" or "BS": you provided "' + file_type + '"')
95 else:
96 msg.info('Forced file_type: {0}'.format(file_type))
97
98 # Check the value of mode parameter
99 if mode not in ('tiny', 'lite', 'full', 'peeker', 'iov'):
100 raise NameError('Allowed values for "mode" parameter are: "tiny", "lite", "peeker", "iov" or "full"')
101
102 if meta_key_filter is None:
103 meta_key_filter = []
104
105 # Disable 'full' and 'iov' in non-Gaudi environments
106 if not isGaudiEnv():
107 if mode in ('full', 'iov'):
108 raise NameError('The following modes are not available in AnalysisBase: "iov" and "full"')
109
110 msg.info('Current mode used: {0}'.format(mode))
111 msg.info('Current filenames: {0}'.format(filenames))
112
113 if mode != 'full' and mode !='iov' and len(meta_key_filter) > 0:
114 raise NameError('It is possible to use the meta_key_filter option only for full mode')
115 if meta_key_filter:
116 msg.info('Filter used: {0}'.format(meta_key_filter))
117
118 # create the storage object for metadata.
119 meta_dict = {}
120
121 # ----- retrieve metadata from all filename or filenames --------------------------------------------------------#
122 for filename in filenames:
123 meta_dict[filename] = {}
124 current_file_type = None
125 # Determine the file_type of the input and store this information into meta_dict
126 if not file_type:
127 if os.path.isfile(filename):
128
129 if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and gSystem.AccessPathName(filename): # Attention, bizarre convention of return value!!
130 msg.warn('Ignoring not accessible file: {}'.format(filename))
131 continue
132
133 with open(filename, 'rb') as binary_file:
134 magic_file = binary_file.read(4)
135
136 if magic_file == 'root' or magic_file == b'root':
137 current_file_type = 'POOL'
138 meta_dict[filename]['file_type'] = 'POOL'
139
140 elif Project.determine() in (
141 Project.AnalysisBase, Project.AthAnalysis):
142 raise RuntimeError(
143 f"{filename} is not a ROOT file, assumed bytestream"
144 ", this is not supported in Analysis releases")
145 else:
146 current_file_type = 'BS'
147 meta_dict[filename]['file_type'] = 'BS'
148
149 # add information about the file_size of the input filename
150 meta_dict[filename]['file_size'] = os.path.getsize(filename)
151
152 # determine the file type for the remote input files
153 else:
154 if regex_BS_files.match(filename):
155 current_file_type = 'BS'
156 meta_dict[filename]['file_type'] = 'BS'
157 else:
158 current_file_type = 'POOL'
159 meta_dict[filename]['file_type'] = 'POOL'
160
161 # add information about the file_size of the input filename
162 meta_dict[filename]['file_size'] = None # None -> we can't read the file size for a remote file
163
164 else:
165 current_file_type = file_type
166
167 # ----- retrieves metadata from POOL files ------------------------------------------------------------------#
168 if current_file_type == 'POOL':
169
170 if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and gSystem.AccessPathName(filename): # Attention, bizarre convention of return value!!
171 msg.warn('Ignoring not accessible file: {}'.format(filename))
172 continue
173
174 import ROOT
175 # open the file using ROOT.TFile
176 current_file = ROOT.TFile.Open( _get_pfn(filename) )
177
178 # get auto flush setting from the main EventData TTree
179 from PyUtils.PoolFile import PoolOpts
180 collectionTree = current_file.Get(PoolOpts.TTreeNames.EventData)
181 if isinstance(collectionTree, ROOT.TTree):
182 meta_dict[filename]['auto_flush'] = collectionTree.GetAutoFlush()
183
184 # read and add the 'GUID' value
185 meta_dict[filename]['file_guid'] = _read_guid(filename)
186
187 # read and add compression level and algorithm
188 meta_dict[filename]['file_comp_alg'] = current_file.GetCompressionAlgorithm()
189 meta_dict[filename]['file_comp_level'] = current_file.GetCompressionLevel()
190
191 if (
192 isRNTuple(md:=current_file.Get(PoolOpts.RNTupleNames.MetaData))
193 and mode != "tiny"
194 ):
195 msg.warning(
196 "Reading in-file metadata from RNTuple is currently of limited support"
197 )
198 meta_dict[filename]["metadata_items"] = {}
199
200 try:
201 from ROOT import RNTupleReader
202 except ImportError:
203 from ROOT.Experimental import RNTupleReader
204
205 reader = RNTupleReader.Open(md)
206 entry = reader.CreateEntry()
207 reader.LoadEntry(0, entry)
208 auxes = {}
209 classes_with_aux = {
210 "xAOD::FileMetaData_v1",
211 "xAOD::FileMetaDataAuxInfo_v1",
212 "xAOD::TriggerMenuJsonAuxContainer_v1",
213 "DataVector<xAOD::TriggerMenuJson_v1>",
214 "xAOD::TruthMetaDataAuxContainer_v1",
215 "DataVector<xAOD::TruthMetaData_v1>",
216 "xAOD::CutBookkeeperContainer_v1",
217 "xAOD::CutBookkeeperAuxContainer_v1",
218 "xAOD::LumiBlockRangeAuxContainer_v1",
219 "DataVector<xAOD::LumiBlockRange_v1>",
220 }
221
222 dynamic_fmd_items = {}
223
224 meta_filter = get_meta_filter(mode, meta_key_filter)
225
226 for field in reader.GetDescriptor().GetTopLevelFields():
227 normalizedName = field.GetFieldName()
228 if "index_ref" in normalizedName:
229 continue
230 if regexIOVMetaDataContainer.match(field.GetTypeName()):
231 # if field name is e.g. IOVMetaDataContainer_p1__Digitization_Parameters,
232 # strip the prefix and change underscore to slash to slash
233 normalizedName = (
234 field.GetFieldName()
235 .replace("IOVMetaDataContainer_p1_", "")
236 .replace("_", "/")
237 )
238 meta_dict[filename]["metadata_items"][normalizedName] = (
239 "IOVMetaDataContainer"
240 )
241 elif regexByteStreamMetadataContainer.match(field.GetTypeName()):
242 meta_dict[filename]["metadata_items"][field.GetFieldName()] = (
243 "ByteStreamMetadataContainer"
244 )
245 elif regexEventStreamInfo.match(field.GetTypeName()):
246 meta_dict[filename]["metadata_items"][field.GetFieldName()] = (
247 "EventStreamInfo"
248 )
249 elif regexXAODFileMetaData.match(field.GetTypeName()):
250 meta_dict[filename]["metadata_items"][
251 field.GetFieldName().replace("xAOD__", "xAOD::")
252 ] = field.GetTypeName()
253 elif regexXAODFileMetaDataAuxDyn.match(
254 normalizedName := field.GetFieldName()
255 .replace("xAOD__", "xAOD::")
256 .replace("AuxDyn:", "AuxDyn.")
257 ):
258 result = (
259 False
260 if entry[field.GetFieldName()] == "\x00"
261 else entry[field.GetFieldName()]
262 )
263 dynamic_fmd_items[normalizedName.split(".")[1]] = result
264 meta_dict[filename]["metadata_items"][normalizedName] = (
265 field.GetTypeName()
266 )
267 continue
268 elif regexXAODFileMetaDataAux.match(field.GetTypeName()):
269 meta_dict[filename]["metadata_items"][
270 field.GetFieldName()
271 .replace("xAOD__", "xAOD::")
272 .replace("Aux:", "Aux.")
273 ] = field.GetTypeName()
274 elif regexXAODTruthMetaData.match(field.GetTypeName()):
275 meta_dict[filename]["metadata_items"][
276 field.GetFieldName()
277 .replace("xAOD__", "xAOD::")
278 .replace("DataVector_", "DataVector<")
279 .replace("__Truth", ">_Truth")
280 ] = "TruthMetaData"
281 elif regexXAODTruthMetaDataAux.match(field.GetTypeName()):
282 meta_dict[filename]["metadata_items"][
283 field.GetFieldName()
284 .replace("xAOD__", "xAOD::")
285 .replace("Aux:", "Aux.")
286 ] = field.GetTypeName()
287 elif regexXAODEventFormat.match(field.GetTypeName()):
288 meta_dict[filename]["metadata_items"][
289 field.GetFieldName().replace("xAOD__", "xAOD::")
290 ] = field.GetTypeName()
291 elif regexXAODTriggerMenuJson.match(field.GetTypeName()):
292 meta_dict[filename]["metadata_items"][
293 field.GetFieldName()
294 .replace("xAOD__", "xAOD::")
295 .replace("DataVector_", "DataVector<")
296 .replace("__Trigger", ">_Trigger")
297 ] = field.GetTypeName()
298 elif regexXAODTriggerMenuJsonAux.match(field.GetTypeName()):
299 meta_dict[filename]["metadata_items"][
300 field.GetFieldName()
301 .replace("xAOD__", "xAOD::")
302 .replace("Aux:", "Aux.")
303 ] = field.GetTypeName()
304 elif regexXAODCutBookkeeperContainer.match(field.GetTypeName()):
305 meta_dict[filename]["metadata_items"][
306 field.GetFieldName()
307 .replace("xAOD__", "xAOD::")
308 .replace("DataVector_", "DataVector<")
309 .replace("__CutBookkeeper", ">_CutBookkeeper")
310 ] = field.GetTypeName()
311 elif regexXAODCutBookkeeperContainerAux.match(field.GetTypeName()):
312 meta_dict[filename]["metadata_items"][
313 field.GetFieldName()
314 .replace("xAOD__", "xAOD::")
315 .replace("Aux:", "Aux.")
316 ] = field.GetTypeName()
317 else:
318 meta_dict[filename]["metadata_items"][
319 field.GetFieldName().replace("Aux:", "Aux.")
320 ] = field.GetTypeName()
321
322 if field.GetTypeName() in classes_with_aux:
323 # handle aux classes later
324 auxes[field.GetFieldName()] = field.GetTypeName()
325 continue
326
327 if not should_keep_meta(
328 normalizedName, field.GetTypeName(), meta_filter
329 ):
330 continue
331
332 try:
333 meta_dict[filename][normalizedName] = _convert_value(
334 entry[field.GetFieldName()]
335 )
336 except KeyError:
337 msg.warning(f"missing type {field.GetTypeName()}")
338
339 meta_dict[filename]["metadata_items"] = denormalize_metadata_types(
340 meta_dict[filename]["metadata_items"]
341 )
342
343 def _get_aux_base(aux_key: str) -> str:
344 # Remove known prefixes
345 key = aux_key
346 key = key.replace("xAOD__TriggerMenuJsonAuxContainer_v1_", "")
347 key = key.replace("xAOD__FileMetaDataAuxInfo_v1_", "")
348 key = key.replace("xAOD__TruthMetaDataAuxContainer_v1_", "")
349 # Remove known suffixes
350 if key.endswith("Aux:"):
351 key = key[:-4]
352 elif key.endswith("Aux"):
353 key = key[:-3]
354 # Remove any trailing ':' or '_'
355 key = key.strip("_:")
356 return key
357
358 def _get_main_base(main_key: str) -> str:
359 main_base = main_key
360 # For DataVectors
361 if main_key.startswith("DataVector_xAOD__TriggerMenuJson_v1__"):
362 main_base = main_key.replace(
363 "DataVector_xAOD__TriggerMenuJson_v1__", ""
364 )
365 # For FileMetaData
366 elif main_key.startswith("xAOD__FileMetaData_v1_"):
367 main_base = main_key.replace("xAOD__FileMetaData_v1_", "")
368 # For TruthMetaData
369 elif main_key.startswith("DataVector_xAOD__TruthMetaData_v1__"):
370 main_base = main_key.replace(
371 "DataVector_xAOD__TruthMetaData_v1__", ""
372 )
373 return main_base
374
375 def _find_associated_pairs(auxes: dict) -> list[tuple[str, str]]:
376 # Build lookup tables
377 aux_map = {}
378 for k in auxes:
379 if "Aux" in k:
380 aux_map[_get_aux_base(k)] = k
381
382 main_map = {}
383 for k in auxes:
384 base = _get_main_base(k)
385 if base:
386 main_map[base] = k
387
388 # Find pairs
389 pairs = []
390 for base, aux_key in aux_map.items():
391 if base in main_map:
392 pairs.append((aux_key, main_map[base]))
393 return pairs
394
395 for pair in _find_associated_pairs(auxes):
396 return_obj = _convert_value(
397 entry[pair[1]],
398 entry[pair[0]],
399 )
400 key = next(
401 (
402 k
403 for k, v in trigger_menu_json_map.items()
404 if v
405 == pair[1]
406 .replace("xAOD__", "xAOD::")
407 .replace("DataVector_", "DataVector<")
408 .replace("__Trigger", ">_Trigger")
409 ),
410 auxes[pair[0]],
411 )
412
413 try:
414 key = (
415 key.replace("xAOD__", "xAOD::")
416 if key.count("_") <= 1
417 else key.replace("xAOD__", "xAOD::").rsplit("_", 2)[0]
418 )
419 except IndexError:
420 pass
421
422 if not should_keep_meta(
423 pair[0]
424 .replace("xAOD__", "xAOD::")
425 .replace("DataVector_", "DataVector<")
426 .replace("__Trigger", ">_Trigger")
427 .replace("Aux:", "Aux."),
428 key,
429 meta_filter,
430 ):
431 continue
432
433 if "TriggerMenuJson" in pair[0]:
434 if "RAWTriggerMenuJson" in return_obj:
435 key = (
436 pair[1]
437 if pair[0].startswith("Trigger")
438 else trigger_menu_json_map[pair[0]]
439 )
440 meta_dict[filename][key] = return_obj["RAWTriggerMenuJson"]
441 del return_obj["RAWTriggerMenuJson"]
442 if "TriggerConfigInfo" not in meta_dict[filename]:
443 meta_dict[filename]["TriggerConfigInfo"] = {}
444 if "dbkey" in return_obj:
445 meta_dict[filename]["TriggerConfigInfo"][
446 pair[0].split("_")[-1].replace("Aux:", "")
447 ] = {"key": return_obj["dbkey"], "name": return_obj["name"]}
448 del return_obj["dbkey"]
449 del return_obj["name"]
450 if "TriggerMenu" not in meta_dict[filename]:
451 meta_dict[filename]["TriggerMenu"] = {}
452 meta_dict[filename]["TriggerMenu"].update(return_obj)
453 elif "FileMetaData" in pair[0]:
454 if "FileMetaData" not in meta_dict[filename]:
455 meta_dict[filename]["FileMetaData"] = dynamic_fmd_items
456 meta_dict[filename]["FileMetaData"].update(return_obj)
457 elif "TruthMetaData" in pair[0]:
458 if pair == ("TruthMetaDataAux:", "TruthMetaData"):
459 if "TruthMetaData" not in meta_dict[filename]:
460 meta_dict[filename]["TruthMetaData"] = {}
461 meta_dict[filename]["TruthMetaData"].update(return_obj)
462 else:
463 # for backward compatibility
464 meta_dict[filename][
465 pair[1]
466 .replace("xAOD__", "xAOD::")
467 .replace("DataVector_", "DataVector<")
468 .replace("__Truth", ">_Truth")
469 ] = {}
470 meta_dict[filename][
471 pair[0]
472 .replace("xAOD__", "xAOD::")
473 .replace("Aux:", "Aux.")
474 ] = {}
475 elif pair == ("CutBookkeepersAux:", "CutBookkeepers"):
476 meta_dict[filename]["CutBookkeepers"] = return_obj
477
478 msg.debug(f"Read metadata from RNTuple: {meta_dict[filename]}")
479
480 else:
481 # ----- read extra metadata required for 'lite' and 'full' modes ----------------------------------------#
482 if mode != 'tiny':
483 # selecting from all tree the only one which contains metadata, respectively "MetaData"
484 metadata_tree = current_file.Get('MetaData')
485 # read all list of branches stored in "MetaData" tree
486 metadata_branches = metadata_tree.GetListOfBranches()
487 nr_of_branches = metadata_branches.GetEntriesFast()
488
489 # object to store the names of metadata containers and their corresponding class name.
490 meta_dict[filename]['metadata_items'] = {}
491
492 meta_filter = get_meta_filter(mode, meta_key_filter)
493
494 # store all persistent classes for metadata container existing in a POOL/ROOT file.
495 persistent_instances = {}
496 dynamic_fmd_items = {}
497
498 # Protect non-Gaudi environments from meta-data classes it doesn't know about
499 if not isGaudiEnv():
500 metadata_tree.SetBranchStatus("*", False)
501
502 for i in range(0, nr_of_branches):
503 branch = metadata_branches.At(i)
504 name = branch.GetName()
505 if name == 'index_ref':
506 # skip the index branch
507 continue
508
509 class_name = branch.GetClassName()
510
511 if regexIOVMetaDataContainer.match(class_name):
512 name = name.replace('IOVMetaDataContainer_p1_', '').replace('_', '/')
513
514 if regexIOVMetaDataContainer.match(class_name):
515 meta_dict[filename]['metadata_items'][name] = 'IOVMetaDataContainer'
516 elif regexByteStreamMetadataContainer.match(class_name):
517 meta_dict[filename]['metadata_items'][name] = 'ByteStreamMetadataContainer'
518 elif regexEventStreamInfo.match(class_name):
519 meta_dict[filename]['metadata_items'][name] = 'EventStreamInfo'
520 elif regexXAODFileMetaData.match(class_name):
521 meta_dict[filename]['metadata_items'][name] = 'FileMetaData'
522 elif regexXAODTruthMetaData.match(class_name):
523 meta_dict[filename]['metadata_items'][name] = 'TruthMetaData'
524 else:
525 type_name = class_name
526 if not type_name:
527 try:
528 type_name = branch.GetListOfLeaves()[0].GetTypeName()
529 except IndexError:
530 pass
531 meta_dict[filename]['metadata_items'][name] = type_name
532
533 if len(meta_filter) > 0:
534 keep = False
535 for filter_key, filter_class in meta_filter.items():
536 if (filter_key.replace('/', '_') in name.replace('/', '_') or filter_key == '*') and fnmatchcase(class_name, filter_class):
537 if 'CutBookkeepers' in filter_key:
538 keep = filter_key == name
539 if keep:
540 break
541 else:
542 keep = True
543 break
544
545 if not keep:
546 continue
547 else:
548 # CutBookkeepers should always be filtered:
549 if 'CutBookkeepers' in name and name not in ['CutBookkeepers', 'CutBookkeepersAux.']:
550 continue
551
552 if not isGaudiEnv():
553 metadata_tree.SetBranchStatus(f"{name}*", True)
554
555 # assign the corresponding persistent class based of the name of the metadata container
556 if regexEventStreamInfo.match(class_name):
557 if class_name.endswith('_p1'):
558 persistent_instances[name] = ROOT.EventStreamInfo_p1()
559 elif class_name.endswith('_p2'):
560 persistent_instances[name] = ROOT.EventStreamInfo_p2()
561 else:
562 persistent_instances[name] = ROOT.EventStreamInfo_p3()
563 elif regexIOVMetaDataContainer.match(class_name):
564 persistent_instances[name] = ROOT.IOVMetaDataContainer_p1()
565 elif regexXAODEventFormat.match(class_name):
566 persistent_instances[name] = ROOT.xAOD.EventFormat_v1()
567 elif regexXAODTriggerMenu.match(class_name) and _check_project() not in ['AthGeneration']:
568 persistent_instances[name] = ROOT.xAOD.TriggerMenuContainer_v1()
569 elif regexXAODTriggerMenuAux.match(class_name) and _check_project() not in ['AthGeneration']:
570 persistent_instances[name] = ROOT.xAOD.TriggerMenuAuxContainer_v1()
571 elif regexXAODTriggerMenuJson.match(class_name) and _check_project() not in ['AthGeneration']:
572 persistent_instances[name] = ROOT.xAOD.TriggerMenuJsonContainer_v1()
573 elif regexXAODTriggerMenuJsonAux.match(class_name) and _check_project() not in ['AthGeneration']:
574 persistent_instances[name] = ROOT.xAOD.TriggerMenuJsonAuxContainer_v1()
575 elif regexXAODCutBookkeeperContainer.match(class_name):
576 persistent_instances[name] = ROOT.xAOD.CutBookkeeperContainer_v1()
577 elif regexXAODCutBookkeeperContainerAux.match(class_name):
578 persistent_instances[name] = ROOT.xAOD.CutBookkeeperAuxContainer_v1()
579 elif regexXAODFileMetaData.match(class_name):
580 persistent_instances[name] = ROOT.xAOD.FileMetaData_v1()
581 elif regexXAODFileMetaDataAux.match(class_name):
582 persistent_instances[name] = ROOT.xAOD.FileMetaDataAuxInfo_v1()
583 elif regexXAODTruthMetaData.match(class_name):
584 persistent_instances[name] = ROOT.xAOD.TruthMetaDataContainer_v1()
585 elif regexXAODTruthMetaDataAux.match(class_name):
586 persistent_instances[name] = ROOT.xAOD.TruthMetaDataAuxContainer_v1()
587
588 if name in persistent_instances:
589 branch.SetAddress(ROOT.AddressOf(persistent_instances[name]))
590
591 # This creates a dict to store the dynamic attributes of the xAOD::FileMetaData
592 dynamicFMD = regexXAODFileMetaDataAuxDyn.match(name)
593 if dynamicFMD:
594 dynamicName = dynamicFMD.group().split('.')[-1]
595 dynamicType = regex_cppname.match(class_name)
596 if dynamicType:
597 # this should be a string
598 dynamic_fmd_items[dynamicName] = ROOT.std.string()
599 branch.SetAddress(ROOT.AddressOf(dynamic_fmd_items[dynamicName]))
600 else:
601 dynamic_fmd_items[dynamicName] = None
602
603
604 metadata_tree.GetEntry(0)
605
606 # This loads the dynamic attributes of the xAOD::FileMetaData from the TTree
607 for key in dynamic_fmd_items:
608 if dynamic_fmd_items[key] is None:
609 try:
610 if key.startswith("is"):
611 # this is probably a boolean
612 dynamic_fmd_items[key] = getattr(metadata_tree, key) != '\x00'
613 else:
614 # this should be a float
615 dynamic_fmd_items[key] = getattr(metadata_tree, key)
616 except AttributeError:
617 # should not happen, but just ignore missing attributes
618 pass
619 else:
620 # convert ROOT.std.string objects to python equivalent
621 dynamic_fmd_items[key] = str(dynamic_fmd_items[key])
622
623 # clean the meta-dict if the meta_key_filter flag is used, to return only the key of interest
624 if meta_key_filter:
625 meta_dict[filename] = {}
626
627 # read the metadata
628 for name, content in persistent_instances.items():
629 key = name
630 if hasattr(content, 'm_folderName'):
631 key = content.m_folderName
632
633 # Some transition AODs contain both the Run2 and Run3 metadata formats. We only wish to read the Run3 format if such a file is encountered.
634 has_r3_trig_meta = ('TriggerMenuJson_HLT' in persistent_instances or 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT' in persistent_instances)
635 aux = None
636 if key.startswith('TriggerMenuJson_') and not key.endswith('Aux.'): # interface container for the menu (AOD)
637 aux = persistent_instances[key+'Aux.']
638 elif key.startswith('DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_') and not key.endswith('Aux.'): # interface container for the menu (ESD)
639 menuPart = key.split('_')[-1]
640 aux = persistent_instances['xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_'+menuPart+'Aux.']
641 elif key == 'TriggerMenu' and 'TriggerMenuAux.' in persistent_instances and not has_r3_trig_meta: # AOD case (legacy support, HLT and L1 menus)
642 aux = persistent_instances['TriggerMenuAux.']
643 elif key == 'DataVector<xAOD::TriggerMenu_v1>_TriggerMenu' and 'xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.' in persistent_instances and not has_r3_trig_meta: # ESD case (legacy support, HLT and L1 menus)
644 aux = persistent_instances['xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.']
645 elif (key == 'CutBookkeepers'
646 and 'CutBookkeepersAux.' in persistent_instances):
647 aux = persistent_instances['CutBookkeepersAux.']
648 elif key == 'CutBookkeepersAux.':
649 continue # Extracted using the interface object
650 elif (key == 'FileMetaData'
651 and 'FileMetaDataAux.' in persistent_instances):
652 aux = persistent_instances['FileMetaDataAux.']
653 elif (key == 'xAOD::FileMetaData_v1_FileMetaData'
654 and 'xAOD::FileMetaDataAuxInfo_v1_FileMetaDataAux.' in persistent_instances):
655 aux = persistent_instances['xAOD::FileMetaDataAuxInfo_v1_FileMetaDataAux.']
656 elif (key == 'TruthMetaData'
657 and 'TruthMetaDataAux.' in persistent_instances):
658 aux = persistent_instances['TruthMetaDataAux.']
659 elif key == 'TruthMetaDataAux.':
660 continue # Extracted using the interface object
661 elif 'Menu' in key and key.endswith('Aux.'):
662 continue # Extracted using the interface object
663
664 return_obj = _convert_value(content, aux)
665
666 if 'TriggerMenuJson' in key or ('TriggerMenu' in key and not has_r3_trig_meta):
667 if 'RAWTriggerMenuJson' in return_obj:
668 meta_dict[filename][key] = return_obj['RAWTriggerMenuJson']
669 del return_obj['RAWTriggerMenuJson']
670 if 'TriggerConfigInfo' not in meta_dict[filename]:
671 meta_dict[filename]['TriggerConfigInfo'] = {}
672 if 'dbkey' in return_obj:
673 meta_dict[filename]['TriggerConfigInfo'][key.split('_')[-1]] = {
674 'key' : return_obj['dbkey'],
675 'name': return_obj['name']
676 }
677 del return_obj['dbkey']
678 del return_obj['name']
679 if 'TriggerMenu' not in meta_dict[filename]:
680 meta_dict[filename]['TriggerMenu'] = {}
681 meta_dict[filename]['TriggerMenu'].update(return_obj)
682 elif "FileMetaData" in key:
683 if "FileMetaData" not in meta_dict[filename]:
684 meta_dict[filename]["FileMetaData"] = dynamic_fmd_items
685 meta_dict[filename]["FileMetaData"].update(return_obj)
686 else:
687 meta_dict[filename][key] = return_obj
688
689 try:
690 # get the number of events from EventStreamInfo
691 esi_dict = next(key for key, value in meta_dict[filename].items()
692 if isinstance(value, dict) and "numberOfEvents" in value and
693 meta_dict[filename]["metadata_items"][key] == "EventStreamInfo")
694 msg.debug(f"{esi_dict=}")
695 meta_dict[filename]["nentries"] = meta_dict[filename][esi_dict]["numberOfEvents"]
696 except StopIteration as err:
697 msg.debug(f"Caught {err=}, {type(err)=}, falling back on opening the DataHeader"
698 " Container to read the number of entries")
699 meta_dict[filename]['nentries'] = dataheader_nentries(current_file)
700 msg.debug(f"{meta_dict[filename]['nentries']=}")
701
702 if unique_tag_info_values and mode=='iov':
703 unique_tag_info_values = False
704 msg.info('disabling "unique_tag_info_values" option for "iov" mode')
705
706 # This is a required workaround which will temporarily be fixing ATEAM-560 originated from ATEAM-531
707 # ATEAM-560: https://its.cern.ch/jira/browse/ATEAM-560
708 # ATEAM-531: https://its.cern.ch/jira/browse/ATEAM-531
709 # This changes will remove all duplicates values presented in some files due
710 # to the improper merging of two IOVMetaDataContainers.
711 if unique_tag_info_values:
712 msg.info('MetaReader is called with the parameter "unique_tag_info_values" set to True. '
713 'This is a workaround to remove all duplicate values from "/TagInfo" key')
714 if '/TagInfo' in meta_dict[filename]:
715 for key, value in meta_dict[filename]['/TagInfo'].items():
716 if isinstance(value, list) and value:
717 if len(unique_values := set(value)) > 1:
718 msg.warn(
719 f"Found multiple values for {key}: {value}. "
720 "Looking for possible duplicates."
721 )
722 maybe_ok = False
723 if key == "AMITag":
724 # curate duplicates like: ['s3681_q453', 's3681_q453_'] or ["s3681_q453", "q453_s3681"]
725 unique_amitags = set()
726 for amitags in unique_values:
727 unique_amitags.add(
728 "_".join([tag for tag in amitags.split("_") if tag])
729 )
730 # Remove tags that are parents - are contained in other tags
731 # Such that ["s3681","s3681_d1485"] keeps only the latter
732 parent_tags = []
733 for atag in unique_amitags:
734 if any(atag+'_' in x for x in unique_amitags if x != atag):
735 parent_tags += [atag]
736 for atag in parent_tags:
737 # Do not remove the last tag!
738 if len(unique_amitags)>1:
739 msg.warn(f"Removing parent AMI tag {atag}")
740 unique_amitags.remove(atag)
741 if len(unique_amitags) == 1:
742 maybe_ok = True
743 # Make sure we keep the one we want to keep
744 value.insert(0,list(unique_amitags)[0])
745 elif key == "beam_energy":
746 # handle duplicates like: ['6500000', '6500000.0'] or [3, "3"]
747 unique_energies = set()
748 for energy in unique_values:
749 try:
750 energy = int(energy)
751 except ValueError:
752 try:
753 energy = float(energy)
754 except ValueError:
755 pass
756 unique_energies.add(energy)
757 if len(unique_energies) == 1:
758 maybe_ok = True
759 elif key in ["AtlasRelease", "IOVDbGlobalTag", "AODFixVersion"]:
760 maybe_ok = True
761 if maybe_ok:
762 msg.warn(
763 f"Multiple values for {key} may mean the same, or "
764 "the input file was produced in multi-step job. "
765 f"Ignoring all but the first entry: {key} = {value[0]}"
766 )
767 else:
768 raise ValueError(
769 f"{key} from /TagInfo contains more than 1 unique value: {value}"
770 )
771
772 meta_dict[filename]['/TagInfo'][key] = value[0]
773
774 if promote is None:
775 promote = mode == 'lite' or mode == 'peeker'
776
777 # Filter the data and create a prettier output for the 'lite' mode
778 if mode == 'lite':
779 meta_dict = make_lite(meta_dict)
780
781 if mode == 'peeker':
782 meta_dict = make_peeker(meta_dict)
783
784 if promote:
785 meta_dict = promote_keys(meta_dict, mode)
786
787 # If AnalysisBase the itemList must be grabbed another way
788 if not isGaudiEnv():
789 if isinstance(collectionTree, ROOT.TTree):
790 meta_dict[filename]['itemList'] = [ (b.GetClassName(), b.GetName()) for b in collectionTree.GetListOfBranches() ]
791
792 # ----- retrieves metadata from bytestream (BS) files (RAW, DRAW) ------------------------------------------#
793 elif current_file_type == 'BS':
794
795 if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and not os.path.isfile(filename):
796 msg.warn('Ignoring not accessible file: {}'.format(filename))
797 continue
798
799 import eformat
800
801 # store the number of entries
802 bs = eformat.istream(filename)
803 meta_dict[filename]['nentries'] = bs.total_events
804
805 # store the 'guid' value
806 data_reader = eformat.EventStorage.pickDataReader(filename)
807 assert data_reader, 'problem picking a data reader for file [%s]' % filename
808
809 # set auto flush equivalent, which for BS is always 1
810 meta_dict[filename]['auto_flush'] = 1
811
812 if hasattr(data_reader, 'GUID'):
813 meta_dict[filename]['file_guid'] = data_reader.GUID()
814
815 # compression level and algorithm, for BS always ZLIB
816 meta_dict[filename]['file_comp_alg'] = 1
817 meta_dict[filename]['file_comp_level'] = 1
818
819
820 # if the flag full is set to true then grab all metadata
821 # ------------------------------------------------------------------------------------------------------#
822 if mode != "tiny":
823 bs_metadata = {}
824
825 for md in data_reader.freeMetaDataStrings():
826 if md.startswith('Event type:'):
827 k = 'eventTypes'
828 v = []
829 if 'is sim' in md:
830 v.append('IS_SIMULATION')
831 else:
832 v.append('IS_DATA')
833
834 if 'is atlas' in md:
835 v.append('IS_ATLAS')
836 else:
837 v.append('IS_TESTBEAM')
838
839 if 'is physics' in md:
840 v.append('IS_PHYSICS')
841 else:
842 v.append('IS_CALIBRATION')
843
844 bs_metadata[k] = tuple(v)
845
846 elif md.startswith('GeoAtlas:'):
847 k = 'geometry'
848 v = md.split('GeoAtlas:')[1].strip()
849 bs_metadata[k] = v
850
851 elif md.startswith('IOVDbGlobalTag:'):
852 k = 'conditions_tag'
853 v = md.split('IOVDbGlobalTag:')[1].strip()
854 bs_metadata[k] = v
855
856 elif '=' in md:
857 k, v = md.split('=', 1) # Split on first '=' only
858 bs_metadata[k] = v
859
860 bs_metadata['detectorMask'] = data_reader.detectorMask()
861 bs_metadata['runNumbers'] = data_reader.runNumber()
862 bs_metadata['lumiBlockNumbers'] = data_reader.lumiblockNumber()
863 bs_metadata['projectTag'] = data_reader.projectTag()
864 bs_metadata['stream'] = data_reader.stream()
865 #bs_metadata['beamType'] = getattr(data_reader, 'beamType')()
866 beamTypeNbr= data_reader.beamType()
867 #According to info from Rainer and Guiseppe the beam type is
868 #O: no beam
869 #1: protons
870 #2: ions
871 if (beamTypeNbr==0): bs_metadata['beamType'] = 'cosmics'
872 elif (beamTypeNbr==1 or beamTypeNbr==2): bs_metadata['beamType'] = 'collisions'
873 else: bs_metadata['beamType'] = 'unknown'
874
875 bs_metadata['beamEnergy'] = data_reader.beamEnergy()
876
877 meta_dict[filename]['eventTypes'] = bs_metadata.get('eventTypes', [])
878 meta_dict[filename]['GeoAtlas'] = bs_metadata.get('geometry', None)
879 meta_dict[filename]['conditions_tag'] = bs_metadata.get('conditions_tag', None)
880 meta_dict[filename]['project_name'] = bs_metadata.get('projectTag', None)
881
882 # Promote up one level
883 meta_dict[filename]['detectorMask'] = [bs_metadata.get('detectorMask', None)]
884 meta_dict[filename]['runNumbers'] = [bs_metadata.get('runNumbers', None)]
885 meta_dict[filename]['lumiBlockNumbers'] = [bs_metadata.get('lumiBlockNumbers', None)]
886 meta_dict[filename]['beam_type'] = bs_metadata.get('beamType', None)
887 meta_dict[filename]['beam_energy'] = bs_metadata.get('beamEnergy', None)
888 meta_dict[filename]['stream'] = bs_metadata.get('stream', None)
889
890 if not data_reader.good():
891 # event-less file...
892 meta_dict[filename]['runNumbers'].append(bs_metadata.get('run_number', 0))
893 meta_dict[filename]['lumiBlockNumbers'].append(bs_metadata.get('LumiBlock', 0))
894
895 msg.debug(f"{meta_dict[filename]=}")
896 msg.debug(f"{len(bs)=}")
897 if len(bs):
898 evt = bs[0]
899 try:
900 evt.check()
901 meta_dict[filename]['processingTags'] = [tag.name for tag in evt.stream_tag()]
902 meta_dict[filename]['evt_number'] = [evt.global_id()]
903 meta_dict[filename]['run_type'] = [eformat.helper.run_type2string(evt.run_type())]
904 # ATLASRECTS-7126: If there is no valid lumiblock information
905 # in the ByteStream header, get the info from the first event.
906 if meta_dict[filename]['lumiBlockNumbers'] == [0]:
907 msg.debug('Taking the luminosity block info from the first event (%i)', evt.lumi_block())
908 meta_dict[filename]['lumiBlockNumbers'] = [evt.lumi_block()]
909 # ATLASRECTS-7126: If there is no valid run number information
910 # in the ByteStream header, get the info from the first event.
911 if meta_dict[filename]['runNumbers'] == [0]:
912 msg.debug('Taking the run number info from the first event (%i)', evt.run_no())
913 meta_dict[filename]['runNumbers'] = [evt.run_no()]
914 except RuntimeError as err:
915 msg.error("Issue while reading the first event of BS file %r: %r", filename, err)
916 else:
917 msg.debug(f"{meta_dict[filename]=}")
918 else:
919 msg.warn(f"Event-less BS {filename=}, will not read metadata information from the first event")
920
921 # fix for ATEAM-122
922 if len(bs_metadata.get('eventTypes', '')) == 0: # see: ATMETADATA-6
923 evt_type = ['IS_DATA', 'IS_ATLAS']
924 if bs_metadata.get('stream', '').startswith('physics_'):
925 evt_type.append('IS_PHYSICS')
926 elif bs_metadata.get('stream', '').startswith('calibration_'):
927 evt_type.append('IS_CALIBRATION')
928 elif bs_metadata.get('projectTag', '').endswith('_calib'):
929 evt_type.append('IS_CALIBRATION')
930 else:
931 evt_type.append('Unknown')
932
933 meta_dict[filename]['eventTypes'] = evt_type
934
935 if mode == 'full':
936 meta_dict[filename]['bs_metadata'] = bs_metadata
937
938 # ------ Throw an error if the user provide other file types -------------------------------------------------#
939 else:
940 msg.error('Unknown filetype for {0} - there is no metadata interface for type {1}'.format(filename, current_file_type))
941 return None
942
943 return meta_dict
944
945
947 import os
948 if 'AthSimulation_DIR' in os.environ:
949 return 'AthSimulation'
950 if 'AthGeneration_DIR' in os.environ:
951 return 'AthGeneration'
952 return 'Athena'
953
954
955def _get_pfn(filename):
956 """
957 Extract the actual filename if LFN or PFN notation is used
958 """
959 pfx = filename[0:4]
960 if pfx == 'PFN:':
961 return filename[4:]
962 if pfx == 'LFN:':
963 import subprocess, os
964 os.environ['POOL_OUTMSG_LEVEL'] = 'Error'
965 output = subprocess.check_output(['FClistPFN','-l',filename[4:]],text=True).split('\n')
966 if len(output) == 2:
967 return output[0]
968 msg.error( 'FClistPFN({0}) returned unexpected number of lines:'.format(filename) )
969 msg.error( '\n'.join(output) )
970 return filename
971
972
973def _read_guid(filename):
974 """
975 Extracts the "guid" (Globally Unique Identifier) in POOL files and Grid catalogs) value from a POOL file.
976 :param filename: the input file
977 :return: the guid value, None if unavailable
978 """
979 import ROOT
980 root_file = ROOT.TFile.Open( _get_pfn(filename) )
981 params = root_file.Get('##Params')
982 try:
983 from ROOT import RNTuple as rnt
984 except ImportError:
985 from ROOT.Experimental import RNTuple as rnt
986 if not params:
987 return
988 if not isinstance(params, ROOT.TTree) and not isinstance(params, rnt) and not isinstance(params, ROOT.TDirectory):
989 raise NotImplementedError(f"Cannot extract GUID from object {params!r} of type {type(params)!r}")
990
991 regex = re.compile(r'\‍[NAME=(\w+)\‍]\‍[VALUE=(.*)\‍]', re.ASCII)
992 fid = None
993
994 if isinstance(params, ROOT.TTree):
995 for entry in params:
996 param = entry.GetLeaf('db_string').GetValueString()
997 result = regex.match(param)
998 if result and result.group(1) == 'FID' :
999 # don't exit yet, it's the last FID entry that counts
1000 fid = result.group(2)
1001 elif isinstance(params, rnt):
1002 try:
1003 from ROOT import RNTupleReader
1004 except ImportError:
1005 from ROOT.Experimental import RNTupleReader
1006 reader = RNTupleReader.Open(params)
1007 try:
1008 entry = reader.CreateEntry()
1009 except AttributeError:
1010 entry = reader.GetModel().CreateEntry()
1011 for idx in range(reader.GetNEntries()):
1012 reader.LoadEntry(idx, entry)
1013 try:
1014 result = regex.match(str(entry['db_string']))
1015 except (AttributeError, TypeError) as err:
1016 # Early RNTuple implementation doesn't allow reading
1017 # strings on the python side, might be triggering it...
1018 msg.error(f"Cannot read FID from ##Params in RNTuple w/ ROOT error: {err}")
1019 return None
1020 if result and result.group(1) == 'FID' :
1021 # don't exit yet, it's the last FID entry that counts
1022 fid = result.group(2)
1023 elif isinstance(params, ROOT.TDirectory):
1024 for key in params.GetListOfKeys():
1025 param = params.Get(key.GetName())
1026 result = regex.match(str(param))
1027 if result and result.group(1) == 'FID' :
1028 # don't exit yet, it's the last FID entry that counts
1029 fid = result.group(2)
1030
1031 return fid
1032
1033
1035 result = {}
1036
1037 for meth in dir(obj):
1038 if not meth.startswith('_'):
1039 if meth.startswith('m_'):
1040
1041 field_name = str(meth)[2:]
1042 field_value = getattr(obj, meth)
1043
1044 result[field_name] = _convert_value(field_value)
1045
1046 return result
1047
1048
1049def _convert_value(value, aux = None):
1050 cl=value.__class__
1051 if hasattr(cl, '__cpp_name__'):
1052 result = regex_cppname.match(cl.__cpp_name__)
1053 if result:
1054 cpp_type = result.group(1)
1055 if cpp_type == 'vector' or cpp_type == 'std::vector':
1056 return [_convert_value(val) for val in value]
1057 elif cpp_type == 'set' or cpp_type == 'std::set':
1058 return {_convert_value(val) for val in value}
1059 elif cpp_type == 'pair' or cpp_type == 'std::pair':
1060 return _convert_value(value.first), _convert_value(value.second)
1061
1062 # elif cpp_type == 'long':
1063 # return int(value)
1064
1065 elif cpp_type == 'string' or cpp_type == 'std::string':
1066 return str(value)
1067
1068 elif cl.__cpp_name__ == "_Bit_reference":
1069 return bool(value)
1070
1071 # special case which extracts data in a better format from IOVPayloadContainer_p1 class
1072 elif cl.__cpp_name__ == 'IOVMetaDataContainer_p1':
1073 return _extract_fields_iovmdc(value)
1074
1075 elif cl.__cpp_name__ == 'IOVPayloadContainer_p1':
1076 if _gbl_mode == 'iov':
1077 return _extract_iov_detailed(value)
1078 else:
1079 return _extract_fields_iov( value, range(value.m_attrIndexes.size()) )
1080
1081 elif cl.__cpp_name__ == 'xAOD::EventFormat_v1':
1082 return _extract_fields_ef(value)
1083 elif cl.__cpp_name__ == 'xAOD::CutBookkeeperContainer_v1':
1084 return _extract_fields_cbk(interface=value, aux=aux)
1085 elif cl.__cpp_name__ == 'xAOD::FileMetaData_v1':
1086 return _extract_fields_fmd(interface=value, aux=aux)
1087 elif cl.__cpp_name__ == 'DataVector<xAOD::TruthMetaData_v1>':
1088 return _extract_fields_tmd(interface=value, aux=aux)
1089
1090 elif cl.__cpp_name__ == 'DataVector<xAOD::TriggerMenu_v1>' :
1091 return _extract_fields_triggermenu(interface=value, aux=aux)
1092
1093 elif cl.__cpp_name__ == 'DataVector<xAOD::TriggerMenuJson_v1>' :
1094 return _extract_fields_triggermenujson(interface=value, aux=aux)
1095
1096 elif (cl.__cpp_name__ == 'EventStreamInfo_p1' or
1097 cl.__cpp_name__ == 'EventStreamInfo_p2' or
1098 cl.__cpp_name__ == 'EventStreamInfo_p3'):
1099 return _extract_fields_esi(value)
1100
1101 elif (cl.__cpp_name__ == 'EventType_p1' or
1102 cl.__cpp_name__ == 'EventType_p3'):
1103 fields = _extract_fields(value)
1104 fields = _convert_event_type_bitmask(fields)
1105 fields = _convert_event_type_user_type(fields)
1106 return fields
1107
1108 elif regex_persistent_class.match(cl.__cpp_name__):
1109 return _extract_fields(value)
1110
1111 return value
1112
1113
1114def _get_attribute_val(iov_container, attr_name, attr_idx):
1115 type_idx = attr_idx.typeIndex()
1116 obj_idx = attr_idx.objIndex()
1117
1118 attr_value = None
1119
1120 if type_idx == 0:
1121 attr_value = bool(iov_container.m_bool[obj_idx])
1122 elif type_idx == 1:
1123 attr_value = int(iov_container.m_char[obj_idx])
1124 elif type_idx == 2:
1125 attr_value = int(iov_container.m_unsignedChar[obj_idx])
1126 elif type_idx == 3:
1127 attr_value = int(iov_container.m_short[obj_idx])
1128 elif type_idx == 4:
1129 attr_value = int(iov_container.m_unsignedShort[obj_idx])
1130 elif type_idx == 5:
1131 attr_value = int(iov_container.m_int[obj_idx])
1132 elif type_idx == 6:
1133 attr_value = int(iov_container.m_unsignedInt[obj_idx])
1134 elif type_idx == 7:
1135 attr_value = int(iov_container.m_long[obj_idx])
1136 elif type_idx == 8:
1137 attr_value = int(iov_container.m_unsignedLong[obj_idx])
1138 elif type_idx == 9:
1139 attr_value = int(iov_container.m_longLong[obj_idx])
1140 elif type_idx == 10:
1141 attr_value = int(iov_container.m_unsignedLongLong[obj_idx])
1142 elif type_idx == 11:
1143 attr_value = float(iov_container.m_float[obj_idx])
1144 elif type_idx == 12:
1145 attr_value = float(iov_container.m_double[obj_idx])
1146 elif type_idx == 13:
1147 # skipping this type because is file IOVPayloadContainer_p1.h (line 120) is commented and not considered
1148 pass
1149 elif type_idx == 14:
1150 attr_value = str(iov_container.m_string[obj_idx])
1151 # Cleaning class name from value
1152 if attr_value.startswith('IOVMetaDataContainer_p1_'):
1153 attr_value = attr_value.replace('IOVMetaDataContainer_p1_', '')
1154 if attr_value.startswith('_'):
1155 attr_value = attr_value.replace('_', '/')
1156 # Now it is clean
1157 elif type_idx == 15:
1158 attr_value = int(iov_container.m_date[obj_idx])
1159 elif type_idx == 16:
1160 attr_value = int(iov_container.m_timeStamp[obj_idx])
1161 else:
1162 raise ValueError('Unknown type id {0} for attribute {1}'.format(type_idx, attr_name))
1163
1164 return attr_value
1165
1166
1167def _extract_fields_iov( iov_container, idx_range ):
1168 result = {}
1169
1170 for idx in idx_range:
1171 attr_idx = iov_container.m_attrIndexes[idx]
1172 name_idx = attr_idx.nameIndex()
1173 attr_name = iov_container.m_attrName[name_idx]
1174 attr_value = _get_attribute_val(iov_container, attr_name, attr_idx)
1175
1176 if attr_name not in result:
1177 result[attr_name] = [attr_value]
1178 else:
1179 result[attr_name].append(attr_value)
1180
1181 max_element_count = 0
1182 for content in result.values():
1183 if len(content) > max_element_count:
1184 max_element_count = len(content)
1185
1186 if max_element_count <= 1:
1187 for name, content in result.items():
1188 if len(content) > 0:
1189 result[name] = content[0]
1190 else:
1191 result[name] = None
1192
1193 return result
1194
1195
1196def _extract_iov_detailed(iov_container):
1197 def iovtostr(t):
1198 # break iov time into high and low halves (run number usually in the higher half)
1199 return "({h}:{l})".format(h=t>>32, l=t&(2^32-1))
1200
1201 def extract_list_collection(iov_container, listCollection ):
1202 result = {}
1203 ln = 0
1204 for list in listCollection.m_attrLists:
1205 ln = ln + 1
1206 lname = 'List {ln}: iov=[{s} ,{e}]; Channel#={ch}'.format(
1207 ln=ln, s=iovtostr(list.m_range.m_start),
1208 e=iovtostr(list.m_range.m_stop),
1209 ch=list.m_channelNumber )
1210 result[ lname ] = _extract_fields_iov( iov_container, range(list.m_firstIndex, list.m_lastIndex) )
1211 return result
1212
1213 result = {}
1214 pn = 0
1215 for listCollection in iov_container.m_payloadVec:
1216 pn = pn + 1
1217 pname = 'IOV range {n}: [{s}, {e}]'.format(n=pn, s=iovtostr(listCollection.m_start),
1218 e=iovtostr(listCollection.m_stop))
1219 result[ pname ] = extract_list_collection(iov_container, listCollection )
1220 return result
1221
1222
1224 return _convert_value(value.m_payload)
1225
1226
1228 result = {}
1229
1230 result['eventTypes'] = []
1231 for eventType in value.m_eventTypes:
1232 result['eventTypes'].append(_convert_value(eventType))
1233
1234 result['numberOfEvents'] = value.m_numberOfEvents
1235 result['runNumbers'] = list(value.m_runNumbers)
1236 result['lumiBlockNumbers'] = list(value.m_lumiBlockNumbers)
1237 result['processingTags'] = [str(v) for v in value.m_processingTags]
1238 result['itemList'] = []
1239
1240 # Get the class name in the repository with CLID <clid>
1241 from CLIDComps.clidGenerator import clidGenerator
1242 cgen = clidGenerator("")
1243 for clid, sgkey in value.m_itemList:
1244 if isinstance(sgkey, bytes):
1245 sgkey = sgkey.decode()
1246 else:
1247 sgkey = str(sgkey)
1248 result['itemList'].append((cgen.getNameFromClid(clid), sgkey))
1249
1250 return result
1251
1252
1254 result = {}
1255
1256 for ef_element in value:
1257 result[ef_element.first] = ef_element.second.className()
1258
1259 return result
1260
1261
1262def _extract_fields_cbk(interface=None, aux=None):
1263 """Extract CutBookkeeper content into dictionary
1264
1265 This function takes the CutBookkeeperContainer_v1 and CutBookkeeperAuxContainer_v1 objects.
1266 It makes sure the the interface object uses the auxiliary object as store.
1267 Args:
1268 interface (CutBookkeeperContainer_v1): the interface class
1269 aux (CutBookkeeperAuxContainer_v1): auxiliary container object
1270 Returns
1271 dict: with the cycle number and last stream
1272 """
1273 if not interface or not aux:
1274 return {}
1275 interface.setStore(aux)
1276
1277 max_cycle = -1
1278 input_stream = ''
1279
1280 for cbk in interface:
1281 current_cycle = int(cbk.cycle())
1282 if current_cycle > max_cycle:
1283 max_cycle = current_cycle
1284 input_stream = str(cbk.inputStream())
1285
1286 result = {
1287 'currentCutCycle': max_cycle,
1288 'currentCutInputStream': input_stream,
1289 }
1290 return result
1291
1292
1293def _extract_fields_fmd(interface=None, aux=None):
1294 """Turn static FileMetaData content into dictionary
1295
1296 This function takes the FileMetaData_v1 and FileMetaDataAuxInfo_v1 objects.
1297 It makes sure the the interface object uses the auxiliary object as store.
1298 Next the two static variables of FileMetaDataAuxInfo_v1 are retrieved and
1299 added to the dictionary that is returned.
1300 Args:
1301 interface (FileMetaData_v1): the interface class
1302 aux (FileMetaDataAuxInfo_v1): auxiliary container object
1303 Returns
1304 dict: with the production release and dataType
1305 """
1306 import ROOT
1307 if not interface or not aux:
1308 return {}
1309 interface.setStore(aux)
1310 metaContent = {
1311 "productionRelease": ROOT.std.string(),
1312 "dataType": ROOT.std.string(),
1313 "runNumbers": ROOT.std.vector('unsigned int')(),
1314 "lumiBlocks": ROOT.std.vector('unsigned int')(),
1315 }
1316 # Note: using this for dynamic attributes returns empty content
1317 for k, v in metaContent.items():
1318 try:
1319 interface.value(getattr(interface, k), v)
1320 except AttributeError:
1321 interface.value(k, v)
1322 # Now return python objects
1323 result = {k: str(v) for k, v in metaContent.items() if type(v) is ROOT.std.string}
1324 result.update({k: list(v) for k, v in metaContent.items() if type(v) is ROOT.std.vector('unsigned int')})
1325 return result
1326
1327
1328def _extract_fields_tmd(interface=None, aux=None):
1329 import ROOT
1330 BadAuxVarException = ROOT.SG.ExcBadAuxVar
1331 """Extract TruthMetaData content into dictionary
1332
1333 This function takes the TruthMetaDataContainer_v1 and TruthMetaDataAuxContainer_v1 objects.
1334 It makes sure the the interface object uses the auxiliary object as store.
1335 Args:
1336 interface (TruthMetaDataContainer_v1): the interface class
1337 aux (TruthMetaDataAuxContainer_v1): auxiliary container object
1338 Returns
1339 dict
1340 """
1341 if not interface or not aux:
1342 return {}
1343 interface.setStore(aux)
1344
1345 # return the first as we do not really expect more than one
1346 result = {}
1347 for tmd in interface:
1348 result['mcChannelNumber'] = tmd.mcChannelNumber()
1349
1350 try:
1351 result['weightNames'] = [str(v) for v in tmd.weightNames()]
1352 except BadAuxVarException:
1353 result['weightNames'] = []
1354
1355 try:
1356 result['lhefGenerator'] = str(tmd.lhefGenerator())
1357 except BadAuxVarException:
1358 result['lhefGenerator'] = ''
1359
1360 try:
1361 result['generators'] = str(tmd.generators())
1362 except BadAuxVarException:
1363 result['generators'] = ''
1364
1365 try:
1366 result['evgenProcess'] = str(tmd.evgenProcess())
1367 except BadAuxVarException:
1368 result['evgenProcess'] = ''
1369
1370 try:
1371 result['evgenTune'] = str(tmd.evgenTune())
1372 except BadAuxVarException:
1373 result['evgenTune'] = ''
1374
1375 try:
1376 result['hardPDF'] = str(tmd.hardPDF())
1377 except BadAuxVarException:
1378 result['hardPDF'] = ''
1379
1380 try:
1381 result['softPDF'] = str(tmd.softPDF())
1382 except BadAuxVarException:
1383 result['softPDF'] = ''
1384
1385 return result
1386
1387
1388""" Note: Deprecated. Legacy support for Run 2 AODs produced in release 21 or in release 22 prior to April 2021
1389"""
1391 if aux is None:
1392 return {}
1393
1394 L1Items = []
1395 HLTChains = []
1396
1397 try:
1398 interface.setStore( aux )
1399 if interface.size() > 0:
1400 # We make the assumption that the first stored SMK is
1401 # representative of all events in the input collection.
1402 firstMenu = interface.at(0)
1403 L1Items = [ _convert_value(item) for item in firstMenu.itemNames() ]
1404 HLTChains = [ _convert_value(chain) for chain in firstMenu.chainNames() ]
1405 except Exception as err: # noqa: F841
1406 msg.warn('Problem reading xAOD::TriggerMenu:')
1407
1408 result = {}
1409 result['L1Items'] = L1Items
1410 result['HLTChains'] = HLTChains
1411
1412 return result
1413
1415 result = {}
1416
1417 try:
1418 interface.setStore( aux )
1419 if interface.size() > 0:
1420 # We make the assumption that the first stored SMK is
1421 # representative of all events in the input collection.
1422 firstMenu = interface.at(0)
1423 import json
1424 decoded = json.loads(firstMenu.payload())
1425 result['RAWTriggerMenuJson'] = firstMenu.payload()
1426 result['name'] = firstMenu.name()
1427 result['dbkey'] = firstMenu.key()
1428 if decoded['filetype'] == 'hltmenu':
1429 result['HLTChains'] = [ _convert_value(chain) for chain in decoded['chains'] ]
1430 elif decoded['filetype'] == 'l1menu':
1431 result['L1Items'] = [ _convert_value(item) for item in decoded['items'] ]
1432 elif decoded['filetype'] in ['bunchgroupset', 'hltprescale', 'l1prescale', 'hltmonitoringsummary']:
1433 return result
1434
1435 else:
1436 msg.warn('Got an xAOD::TriggerMenuJson called {0} but only expecting hltmenu or l1menu'.format(decoded['filetype']))
1437 return {}
1438
1439 except Exception as err: # noqa: F841
1440 msg.warn('Problem reading xAOD::TriggerMenuJson')
1441
1442 return result
1443
1445 if 'user_type' in value:
1446 items = value['user_type'].split('#')[3:]
1447 for i in range(0, len(items), 2):
1448 value[items[i]] = _convert_value(items[i+1])
1449 return value
1450
1452
1453 types = None
1454 for key in value:
1455 if key == 'bit_mask':
1456 val = value[key]
1457
1458 bitmask_length = len(val)
1459
1460 is_simulation = False
1461 is_testbeam = False
1462 is_calibration = False
1463
1464 if bitmask_length > 0: # ROOT.EventType.IS_SIMULATION
1465 is_simulation = val[0]
1466
1467 if bitmask_length > 1: # ROOT.EventType.IS_TESTBEAM
1468 is_testbeam = val[1]
1469
1470 if bitmask_length > 2: # ROOT.EventType.IS_CALIBRATION:
1471 is_calibration = val[2]
1472
1473 types = [
1474 'IS_SIMULATION' if is_simulation else 'IS_DATA',
1475 'IS_TESTBEAM' if is_testbeam else 'IS_ATLAS',
1476 'IS_CALIBRATION' if is_calibration else 'IS_PHYSICS'
1477 ]
1478
1479 value['type'] = types
1480 return value
1481
1482
1483def make_lite(meta_dict):
1484 for filename, file_content in meta_dict.items():
1485 for key in file_content:
1486 if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
1487 for item in list(meta_dict[filename][key]):
1488 if item not in lite_primary_keys_to_keep:
1489 meta_dict[filename][key].pop(item)
1490
1491 if '/TagInfo' in file_content:
1492
1493
1494 for item in list(meta_dict[filename]['/TagInfo']):
1495 if item not in lite_TagInfo_keys_to_keep:
1496 meta_dict[filename]['/TagInfo'].pop(item)
1497 return meta_dict
1498
1499
1500def make_peeker(meta_dict):
1501 for filename, file_content in meta_dict.items():
1502 for key in file_content:
1503 if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
1504 keys_to_keep = [
1505 'lumiBlockNumbers',
1506 'runNumbers',
1507 'mc_event_number',
1508 'mc_channel_number',
1509 'eventTypes',
1510 'processingTags',
1511 'itemList'
1512 ]
1513 for item in list(meta_dict[filename][key]):
1514 if item not in keys_to_keep:
1515 meta_dict[filename][key].pop(item)
1516
1517 if '/TagInfo' in file_content:
1518 keys_to_keep = [
1519 'beam_energy',
1520 'beam_type',
1521 'GeoAtlas',
1522 'IOVDbGlobalTag',
1523 'AODFixVersion',
1524 'AMITag',
1525 'project_name',
1526 'triggerStreamOfFile',
1527 'AtlasRelease',
1528 'specialConfiguration',
1529 'mc_campaign',
1530 'hepmc_version',
1531 'generators',
1532 'keywords',
1533 'data_year',
1534 ]
1535 for item in list(meta_dict[filename]['/TagInfo']):
1536 if item not in keys_to_keep:
1537 meta_dict[filename]['/TagInfo'].pop(item)
1538
1539 if '/Simulation/Parameters' in file_content:
1540 keys_to_keep = [
1541 'G4Version',
1542 'TruthStrategy',
1543 'SimBarcodeOffset',
1544 'RegenerationIncrement',
1545 'TRTRangeCut',
1546 'SimulationFlavour',
1547 'Simulator',
1548 'PhysicsList',
1549 'SimulatedDetectors',
1550 'IsDataOverlay',
1551 ]
1552 for item in list(meta_dict[filename]['/Simulation/Parameters']):
1553 if item not in keys_to_keep:
1554 meta_dict[filename]['/Simulation/Parameters'].pop(item)
1555
1556 if '/Digitization/Parameters' in file_content:
1557 keys_to_keep = [
1558 'numberOfCollisions',
1559 'intraTrainBunchSpacing',
1560 'BeamIntensityPattern'
1561 'physicsList',
1562 'digiSteeringConf',
1563 'pileUp',
1564 'DigitizedDetectors',
1565 ]
1566 for item in list(meta_dict[filename]['/Digitization/Parameters']):
1567 if item not in keys_to_keep:
1568 meta_dict[filename]['/Digitization/Parameters'].pop(item)
1569
1570 if 'CutBookkeepers' in file_content:
1571 keys_to_keep = [
1572 'currentCutCycle',
1573 'currentCutInputStream',
1574 ]
1575 for item in list(meta_dict[filename]['CutBookkeepers']):
1576 if item not in keys_to_keep:
1577 meta_dict[filename]['CutBookkeepers'].pop(item)
1578
1579 if 'TruthMetaData' in file_content:
1580 keys_to_keep = [
1581 'mcChannelNumber',
1582 'weightNames',
1583 ]
1584 for item in list(meta_dict[filename]['TruthMetaData']):
1585 if item not in keys_to_keep:
1586 meta_dict[filename]['TruthMetaData'].pop(item)
1587
1588 return meta_dict
1589
1590
1591def promote_keys(meta_dict, mode):
1592 for filename, file_content in meta_dict.items():
1593 md = meta_dict[filename]
1594 for key in file_content:
1595 if key in md['metadata_items'] and regexEventStreamInfo.match(md['metadata_items'][key]):
1596 md.update(md[key])
1597
1598 if 'eventTypes' in md and len(md['eventTypes']):
1599 et = md['eventTypes'][0]
1600 md['mc_event_number'] = et.get('mc_event_number', md['runNumbers'][0])
1601 if 'mc_channel_number' in et:
1602 md['mc_channel_number'] = et.get('mc_channel_number', None)
1603 md['eventTypes'] = et['type']
1604
1605 # For very old files
1606 if 'GeoAtlas' in et:
1607 md['GeoAtlas'] = et.get('GeoAtlas', None)
1608 if 'IOVDbGlobalTag' in et:
1609 md['IOVDbGlobalTag'] = et.get('IOVDbGlobalTag', None)
1610
1611 if 'lumiBlockNumbers' in md[key]:
1612 md['lumiBlockNumbers'] = md[key]['lumiBlockNumbers']
1613
1614 if 'processingTags' in md[key]:
1615 md['processingTags'] = md[key]['processingTags']
1616
1617 meta_dict[filename].pop(key)
1618 break
1619
1620 if not isGaudiEnv() and key in md['metadata_items'] and 'FileMetaData' in key:
1621 if 'beamType' in md[key]:
1622 md['beam_type'] = md[key]['beamType']
1623
1624 if 'runNumbers' in md[key]:
1625 md['runNumbers'] = md[key]['runNumbers']
1626
1627 if 'mcProcID' in md[key]:
1628 md['mc_channel_number'] = int(md[key]['mcProcID'])
1629
1630 if 'mcCampaign' in md[key]:
1631 md['mc_campaign'] = md[key]['mcCampaign']
1632
1633 if 'dataYear' in md[key]:
1634 md['data_year'] = int(md[key]['dataYear'])
1635
1636 if 'lumiBlocks' in md[key]:
1637 md['lumiBlockNumbers'] = md[key]['lumiBlocks']
1638
1639 if mode == 'peeker' and 'amiTag' in md[key]:
1640 md['AMITag'] = md[key]['amiTag']
1641
1642 if 'beamEnergy' in md[key]:
1643 md['beam_energy'] = int(md[key]['beamEnergy'])
1644
1645 if 'geometryVersion' in md[key]:
1646 md['GeoAtlas'] = md[key]['geometryVersion']
1647
1648 # EventType checks
1649 md['eventTypes'] = []
1650 if mode == 'peeker' and 'simFlavour' in md[key]:
1651 md['SimulationFlavour'] = md[key]['simFlavour']
1652
1653 if mode == 'peeker' and 'isDataOverlay' in md[key]:
1654 md['IsDataOverlay'] = md[key]['isDataOverlay']
1655
1656 if 'dataType' in md[key]:
1657 md['processingTags'] = [md[key]['dataType']]
1658
1659 if (
1660 ('simFlavour' in md[key] and ('FullG4' in md[key]['simFlavour'] or 'ATLFAST' in md[key]['simFlavour']))
1661 or 'DAOD_TRUTH' in md[key]['dataType']
1662 ):
1663 md['eventTypes'].append('IS_SIMULATION')
1664 else:
1665 md['eventTypes'].append('IS_DATA')
1666
1667 if (
1668 'GeoAtlas' in md and 'ATLAS' in md['GeoAtlas']
1669 or 'DAOD_TRUTH' in md[key]['dataType']
1670 ):
1671 md['eventTypes'].append('IS_ATLAS')
1672 # this is probably safe to assume for all files used in AnalysisBase
1673 md['eventTypes'].append('IS_PHYSICS')
1674 else:
1675 md['eventTypes'].append('IS_TESTBEAM')
1676
1677 if mode == 'peeker':
1678 if 'productionRelease' in md[key]:
1679 md['AtlasRelease'] = md[key]['productionRelease']
1680
1681 if 'generatorsInfo' in md[key]:
1682 md['generators'] = md[key]['generatorsInfo']
1683
1684 if mode == 'lite':
1685 meta_dict[filename].pop(key)
1686 break
1687
1688 if '/TagInfo' in file_content:
1689 md.update(md['/TagInfo'])
1690 md.pop('/TagInfo')
1691
1692 if '/Generation/Parameters' in file_content:
1693 md.update(md['/Generation/Parameters'])
1694 md.pop('/Generation/Parameters')
1695
1696 if '/Simulation/Parameters' in file_content:
1697 md.update(md['/Simulation/Parameters'])
1698 md.pop('/Simulation/Parameters')
1699
1700 if '/Digitization/Parameters' in file_content:
1701 md.update(md['/Digitization/Parameters'])
1702 md.pop('/Digitization/Parameters')
1703
1704 if 'CutBookkeepers' in file_content:
1705 md.update(md['CutBookkeepers'])
1706 md.pop('CutBookkeepers')
1707
1708 return meta_dict
1709
1710
1711def convert_itemList(metadata, layout):
1712 """
1713 This function will rearrange the itemList values to match the format of 'eventdata_items', 'eventdata_itemsList'
1714 or 'eventdata_itemsDic' generated with the legacy file peeker tool
1715 :param metadata: a dictionary obtained using read_metadata method.
1716 The mode for read_metadata must be 'peeker of 'full'
1717 :param layout: the mode in which the data will be converted:
1718 * for 'eventdata_items' use: layout= None
1719 * for 'eventdata_itemsList' use: layout= '#join'
1720 * for 'eventdata_itemsDic' use: layout= 'dict'
1721 """
1722
1723 # Find the itemsList:
1724 item_list = None
1725
1726 if 'itemList' in metadata:
1727 item_list = metadata['itemList']
1728 else:
1729
1730 current_key = None
1731
1732 for key in metadata:
1733 if 'metadata_items' in metadata and key in metadata['metadata_items'] and metadata['metadata_items'][key] == 'EventStreamInfo_p3':
1734 current_key = key
1735 break
1736 if current_key is not None:
1737 item_list = metadata[current_key]['itemList']
1738
1739 if item_list is not None:
1740
1741 if layout is None:
1742 return item_list
1743
1744 elif layout == '#join':
1745 return [k + '#' + v for k, v in item_list if k]
1746
1747
1748 elif layout == 'dict':
1749 from collections import defaultdict
1750 dic = defaultdict(list)
1751
1752 for k, v in item_list:
1753 dic[k].append(v)
1754
1755 return dict(dic)
1756
1757
1759 """Extract number of entries from DataHeader.
1760
1761 infile ROOT TFile object or filename string
1762 return Number of entries as returned by DataHeader object in infile,
1763 None in absence of DataHeader object
1764 """
1765 import ROOT
1766 from PyUtils.PoolFile import PoolOpts
1767 if not isinstance(infile, ROOT.TFile):
1768 infile = ROOT.TFile.Open(infile)
1769
1770 for name in {PoolOpts.TTreeNames.DataHeader, PoolOpts.RNTupleNames.DataHeader}:
1771 obj = infile.Get(name)
1772 msg.debug(f"dataheader_nentries: {name=}, {obj=}, {type(obj)=}")
1773 if not obj:
1774 continue
1775 if isinstance(obj, ROOT.TTree):
1776 return obj.GetEntriesFast()
1777 else:
1778 # check early to avoid scary ROOT read errors
1779 if ROOT.gROOT.GetVersionInt() < 63100:
1780 raise RuntimeError("ROOT ver. 6.31/01 or greater needed to read RNTuple files")
1781 if isRNTuple(obj):
1782 try:
1783 return ROOT.Experimental.RNTupleReader.Open(obj).GetNEntries()
1784 except AttributeError:
1785 return ROOT.RNTupleReader.Open(obj).GetNEntries()
1786 else:
1787 raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
1788
1789def get_meta_filter(mode="lite", meta_key_filter=None) -> dict:
1790 """Return a dictionary of metadata filters based on the mode and
1791 optional meta_key_filter.
1792 """
1793
1794 if meta_key_filter is None:
1795 meta_key_filter = []
1796
1797 # create a container for the list of filters used for the lite version
1798 meta_filter = {}
1799
1800 # set the filters for name
1801 if mode == 'lite':
1802 if isGaudiEnv():
1803 meta_filter = {
1804 '/TagInfo': 'IOVMetaDataContainer_p1',
1805 'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
1806 '*': 'EventStreamInfo_p*'
1807 }
1808 else:
1809 meta_filter = {
1810 'FileMetaData': '*',
1811 'FileMetaDataAux.': 'xAOD::FileMetaDataAuxInfo_v1',
1812 }
1813
1814 # set the filters for name
1815 if mode == 'peeker':
1816 meta_filter.update({
1817 'TriggerMenu': 'DataVector<xAOD::TriggerMenu_v1>', # R2 trigger metadata format AOD (deprecated)
1818 'TriggerMenuAux.': 'xAOD::TriggerMenuAuxContainer_v1',
1819 'DataVector<xAOD::TriggerMenu_v1>_TriggerMenu': 'DataVector<xAOD::TriggerMenu_v1>', # R2 trigger metadata format ESD (deprecated)
1820 'xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.': 'xAOD::TriggerMenuAuxContainer_v1',
1821 'TriggerMenuJson_HLT': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1822 'TriggerMenuJson_HLTAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1823 'TriggerMenuJson_HLTMonitoring': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1824 'TriggerMenuJson_HLTMonitoringAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1825 'TriggerMenuJson_HLTPS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1826 'TriggerMenuJson_HLTPSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1827 'TriggerMenuJson_L1': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1828 'TriggerMenuJson_L1Aux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1829 'TriggerMenuJson_L1PS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1830 'TriggerMenuJson_L1PSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1831 'CutBookkeepers': 'xAOD::CutBookkeeperContainer_v1',
1832 'CutBookkeepersAux.': 'xAOD::CutBookkeeperAuxContainer_v1',
1833 'FileMetaData': '*',
1834 'FileMetaDataAux.': 'xAOD::FileMetaDataAuxInfo_v1',
1835 'TruthMetaData': '*',
1836 'TruthMetaDataAux.': 'xAOD::TruthMetaDataAuxContainer_v1',
1837 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1838 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1839 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1840 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTMonitoringAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1841 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1842 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTPSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1843 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1844 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1Aux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1845 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1846 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1PSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1'
1847 })
1848
1849 if isGaudiEnv():
1850 meta_filter.update({
1851 '/TagInfo': 'IOVMetaDataContainer_p1',
1852 'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
1853 '/Simulation/Parameters': 'IOVMetaDataContainer_p1',
1854 '/Digitization/Parameters': 'IOVMetaDataContainer_p1',
1855 '/EXT/DCS/MAGNETS/SENSORDATA': 'IOVMetaDataContainer_p1',
1856 '*': 'EventStreamInfo_p*'
1857 })
1858
1859 if (mode == 'full' or mode == 'iov') and meta_key_filter:
1860 meta_filter = {f: '*' for f in meta_key_filter}
1861
1862 return meta_filter
1863
1865 """
1866 Convert canonical/C++ STL types in the metadata_items dictionary back to their
1867 ROOT equivalents for backward compatibility.
1868 - 'float' => 'Float_t'
1869 - 'char' => 'Char_t'
1870 - 'std::string' => 'string'
1871 - 'xAOD::FileMetaData_v1' => 'FileMetaData'
1872 - 'xAOD::FileMetaDataAuxInfo_v1' => 'FileMetaDataAux'
1873 (add more as needed)
1874 """
1875 type_map = {
1876 "float": "Float_t",
1877 "char": "Char_t",
1878 "std::string": "string",
1879 "std::uint32_t": "UInt_t",
1880 "xAOD::FileMetaData_v1": "FileMetaData",
1881 }
1882 denormalized = {}
1883 for k, v in metadata_dict.items():
1884 new_v = v
1885 for old, new in type_map.items():
1886 if new_v == old:
1887 new_v = new
1888 elif new_v.endswith("." + old):
1889 new_v = new_v.rsplit(".", 1)[0] + "." + new
1890 denormalized[k] = new_v
1891 return denormalized
1892
1893
1894def should_keep_meta(normalizedName, typeName, meta_filter):
1895 """
1896 Helper function to determine if metadata should be kept based on meta_filter.
1897 """
1898 if len(meta_filter) == 0:
1899 return True
1900
1901 for filter_key, filter_class in meta_filter.items():
1902 if (
1903 filter_key.replace("/", "_") in normalizedName.replace("/", "_")
1904 or filter_key == "*"
1905 ) and fnmatchcase(typeName, filter_class):
1906 if "CutBookkeepers" in filter_key:
1907 keep = filter_key == normalizedName
1908 if keep:
1909 return True
1910 else:
1911 return True
1912 return False
STL class.
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition hcg.cxx:312
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:179
promote_keys(meta_dict, mode)
_extract_fields_esi(value)
denormalize_metadata_types(metadata_dict)
_extract_fields_cbk(interface=None, aux=None)
dataheader_nentries(infile)
should_keep_meta(normalizedName, typeName, meta_filter)
_extract_fields_fmd(interface=None, aux=None)
_get_attribute_val(iov_container, attr_name, attr_idx)
_extract_fields_ef(value)
make_lite(meta_dict)
_extract_fields_iovmdc(value)
_extract_iov_detailed(iov_container)
_extract_fields_iov(iov_container, idx_range)
_extract_fields_tmd(interface=None, aux=None)
_convert_value(value, aux=None)
convert_itemList(metadata, layout)
_extract_fields_triggermenujson(interface, aux)
_get_pfn(filename)
read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter=None, unique_tag_info_values=True, ignoreNonExistingLocalFiles=False)
Definition MetaReader.py:71
_convert_event_type_user_type(value)
_read_guid(filename)
make_peeker(meta_dict)
_convert_event_type_bitmask(value)
dict get_meta_filter(mode="lite", meta_key_filter=None)
_extract_fields_triggermenu(interface, aux)