ATLAS Offline Software
Loading...
Searching...
No Matches
MetaReader.py
Go to the documentation of this file.
1# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
2
3import os
4import re
5from fnmatch import fnmatchcase
6from AthenaCommon.Logging import logging
7from AthenaConfiguration.AthConfigFlags import isGaudiEnv
8from PyUtils.PoolFile import isRNTuple
9from ROOT import gSystem
10from AthenaConfiguration.Enums import Project
11
12
13msg = logging.getLogger('MetaReader')
14
15# compile the regex needed in _convert_value() outside it to optimize the code.
16regexEventStreamInfo = re.compile(r'^EventStreamInfo(_p\d+)?$')
17regexIOVMetaDataContainer = re.compile(r'^IOVMetaDataContainer(_p\d+)?$')
18regexByteStreamMetadataContainer = re.compile(r'^ByteStreamMetadataContainer(_p\d+)?$')
19regexXAODCutBookkeeperContainer = re.compile(r'^xAOD::CutBookkeeperContainer(_v\d+)?$')
20regexXAODCutBookkeeperContainerAux = re.compile(r'^xAOD::CutBookkeeperAuxContainer(_v\d+)?$')
21regexXAODEventFormat = re.compile(r'^xAOD::EventFormat(_v\d+)?$')
22regexXAODFileMetaData = re.compile(r'^xAOD::FileMetaData(_v\d+)?$')
23regexXAODFileMetaDataAux = re.compile(r'^xAOD::FileMetaDataAuxInfo(_v\d+)?$')
24regexXAODFileMetaDataAuxDyn = re.compile(r'^(xAOD::)?FileMetaData.*AuxDyn(\.[a-zA-Z0-9]+)?$')
25regexXAODTriggerMenu = re.compile(r'^DataVector<xAOD::TriggerMenu(_v\d+)?>$') # Run 2
26regexXAODTriggerMenuAux = re.compile(r'^xAOD::TriggerMenuAuxContainer(_v\d+)?$') # Run 2
27regexXAODTriggerMenuJson = re.compile(r'^DataVector<xAOD::TriggerMenuJson(_v\d+)?>$') # Run 3
28regexXAODTriggerMenuJsonAux = re.compile(r'^xAOD::TriggerMenuJsonAuxContainer(_v\d+)?$') # Run 3
29regexXAODTruthMetaData = re.compile(r'^DataVector<xAOD::TruthMetaData(_v\d+)?>$')
30regexXAODTruthMetaDataAux = re.compile(r'^xAOD::TruthMetaDataAuxContainer(_v\d+)?$')
31regex_cppname = re.compile(r'^([\w:]+)(<.*>)?$')
32# regex_persistent_class = re.compile(r'^([a-zA-Z]+_p\d+::)*[a-zA-Z]+_p\d+$')
33regex_persistent_class = re.compile(r'^([a-zA-Z]+(_[pv]\d+)?::)*[a-zA-Z]+_[pv]\d+$')
34regex_BS_files = re.compile(r'^(\w+):.*((\.D?RAW\..*)|(\.data$))')
35regex_URI_scheme = re.compile(r'^([A-Za-z0-9\+\.\-]+)\:')
36
37lite_primary_keys_to_keep = [
38 'lumiBlockNumbers', 'runNumbers', 'mc_event_number', 'mc_channel_number',
39 'eventTypes', 'processingTags', 'itemList']
40lite_TagInfo_keys_to_keep = [
41 'beam_energy', 'beam_type', 'GeoAtlas', 'IOVDbGlobalTag',
42 'AODFixVersion', 'project_name', 'mc_campaign', 'keywords']
43
44trigger_keys = [
45 'TriggerConfigInfo',
46 'TriggerMenu', 'TriggerMenuJson_BG', 'TriggerMenuJson_HLT', 'TriggerMenuJson_HLTMonitoring', 'TriggerMenuJson_HLTPS', 'TriggerMenuJson_L1', 'TriggerMenuJson_L1PS',
47 '/TRIGGER/HLT/Groups', '/TRIGGER/HLT/HltConfigKeys', '/TRIGGER/HLT/Menu', '/TRIGGER/HLT/PrescaleKey', '/TRIGGER/HLT/Prescales',
48 '/TRIGGER/LVL1/ItemDef', '/TRIGGER/LVL1/Lvl1ConfigKey', '/TRIGGER/LVL1/Menu', '/TRIGGER/LVL1/Prescales', '/TRIGGER/LVL1/Thresholds',
49 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenu', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_BG', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT',
50 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS',
51 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS',
52]
53
54trigger_menu_json_map = {
55 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1PSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS",
56 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_BGAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_BG",
57 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT",
58 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTMonitoringAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring",
59 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTPSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS",
60 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1Aux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1",
61 "TriggerMenuJson_L1PSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS",
62 "TriggerMenuJson_BGAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_BG",
63 "TriggerMenuJson_HLTAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT",
64 "TriggerMenuJson_HLTMonitoringAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring",
65 "TriggerMenuJson_HLTPSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS",
66 "TriggerMenuJson_L1Aux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1",
67}
68
69
70def read_metadata(filenames, file_type = None, mode = 'lite', promote = None, meta_key_filter = None,
71 unique_tag_info_values = True, ignoreNonExistingLocalFiles=False):
72 """
73 This tool is independent of Athena framework and returns the metadata from a given file.
74 :param filenames: the input file from which metadata needs to be extracted.
75 :param file_type: the type of file. POOL or BS (bytestream: RAW, DRAW) files.
76 :param mode: if true, will return all metadata associated with the filename. By default, is false and this will
77 return a "tiny" version which have only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'.
78 :return: a dictionary of metadata for the given input file.
79 """
80
81 # make the mode and run available in the _convert methods
82 global _gbl_mode, _gbl_run
83 _gbl_mode = mode
84 _gbl_run = None
85
86 from RootUtils import PyROOTFixes # noqa F401
87
88 # Check if the input is a file or a list of files.
89 if isinstance(filenames, str):
90 filenames = [filenames]
91
92 # Check if file_type is an allowed value
93 if file_type is not None:
94 if file_type not in ('POOL', 'BS'):
95 raise NameError('Allowed values for \'file_type\' parameter are: "POOL" or "BS": you provided "' + file_type + '"')
96 else:
97 msg.info('Forced file_type: {0}'.format(file_type))
98
99 # Check the value of mode parameter
100 if mode not in ('tiny', 'lite', 'full', 'peeker', 'iov'):
101 raise NameError('Allowed values for "mode" parameter are: "tiny", "lite", "peeker", "iov" or "full"')
102
103 if meta_key_filter is None:
104 meta_key_filter = []
105
106 # Disable 'full' and 'iov' in non-Gaudi environments
107 if not isGaudiEnv():
108 if mode in ('full', 'iov'):
109 raise NameError('The following modes are not available in AnalysisBase: "iov" and "full"')
110
111 msg.info('Current mode used: {0}'.format(mode))
112 msg.info('Current filenames: {0}'.format(filenames))
113
114 if mode != 'full' and mode !='iov' and len(meta_key_filter) > 0:
115 raise NameError('It is possible to use the meta_key_filter option only for full mode')
116 if meta_key_filter:
117 msg.info('Filter used: {0}'.format(meta_key_filter))
118
119 # create the storage object for metadata.
120 meta_dict = {}
121
122 # ----- retrieve metadata from all filename or filenames --------------------------------------------------------#
123 for filename in filenames:
124 meta_dict[filename] = {}
125 current_file_type = None
126 # reset the global Run for each file to not propagate and old value
127 _gbl_run = None
128 # Determine the file_type of the input and store this information into meta_dict
129 if not file_type:
130 if os.path.isfile(filename):
131
132 if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and gSystem.AccessPathName(filename): # Attention, bizarre convention of return value!!
133 msg.warning('Ignoring not accessible file: {}'.format(filename))
134 continue
135
136 with open(filename, 'rb') as binary_file:
137 magic_file = binary_file.read(4)
138
139 if magic_file == 'root' or magic_file == b'root':
140 current_file_type = 'POOL'
141 meta_dict[filename]['file_type'] = 'POOL'
142
143 elif Project.determine() in (
144 Project.AnalysisBase, Project.AthAnalysis):
145 raise RuntimeError(
146 f"{filename} is not a ROOT file, assumed bytestream"
147 ", this is not supported in Analysis releases")
148 else:
149 current_file_type = 'BS'
150 meta_dict[filename]['file_type'] = 'BS'
151
152 # add information about the file_size of the input filename
153 meta_dict[filename]['file_size'] = os.path.getsize(filename)
154
155 # determine the file type for the remote input files
156 else:
157 if regex_BS_files.match(filename):
158 current_file_type = 'BS'
159 meta_dict[filename]['file_type'] = 'BS'
160 else:
161 current_file_type = 'POOL'
162 meta_dict[filename]['file_type'] = 'POOL'
163
164 # add information about the file_size of the input filename
165 meta_dict[filename]['file_size'] = None # None -> we can't read the file size for a remote file
166
167 else:
168 current_file_type = file_type
169
170 # ----- retrieves metadata from POOL files ------------------------------------------------------------------#
171 if current_file_type == 'POOL':
172
173 if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and gSystem.AccessPathName(filename): # Attention, bizarre convention of return value!!
174 msg.warning('Ignoring not accessible file: {}'.format(filename))
175 continue
176
177 import ROOT
178 # open the file using ROOT.TFile
179 current_file = ROOT.TFile.Open( _get_pfn(filename) )
180
181 # get auto flush setting from the main EventData TTree
182 from PyUtils.PoolFile import PoolOpts
183 collectionTree = current_file.Get(PoolOpts.TTreeNames.EventData)
184 if isinstance(collectionTree, ROOT.TTree):
185 meta_dict[filename]['auto_flush'] = collectionTree.GetAutoFlush()
186
187 # read and add the 'GUID' value
188 meta_dict[filename]['file_guid'] = _read_guid(filename)
189
190 # read and add compression level and algorithm
191 meta_dict[filename]['file_comp_alg'] = current_file.GetCompressionAlgorithm()
192 meta_dict[filename]['file_comp_level'] = current_file.GetCompressionLevel()
193
194 if (
195 isRNTuple(md:=current_file.Get(PoolOpts.RNTupleNames.MetaData))
196 and mode != "tiny"
197 ):
198 msg.warning(
199 "Reading in-file metadata from RNTuple is currently of limited support"
200 )
201 meta_dict[filename]["metadata_items"] = {}
202
203 try:
204 from ROOT import RNTupleReader
205 except ImportError:
206 from ROOT.Experimental import RNTupleReader
207
208 reader = RNTupleReader.Open(md)
209 entry = reader.CreateEntry()
210 reader.LoadEntry(0, entry)
211 auxes = {}
212 classes_with_aux = {
213 "xAOD::FileMetaData_v1",
214 "xAOD::FileMetaDataAuxInfo_v1",
215 "xAOD::TriggerMenuJsonAuxContainer_v1",
216 "DataVector<xAOD::TriggerMenuJson_v1>",
217 "xAOD::TruthMetaDataAuxContainer_v1",
218 "DataVector<xAOD::TruthMetaData_v1>",
219 "xAOD::CutBookkeeperContainer_v1",
220 "xAOD::CutBookkeeperAuxContainer_v1",
221 "xAOD::LumiBlockRangeAuxContainer_v1",
222 "DataVector<xAOD::LumiBlockRange_v1>",
223 }
224
225 dynamic_fmd_items = {}
226
227 meta_filter = get_meta_filter(mode, meta_key_filter)
228
229 for field in reader.GetDescriptor().GetTopLevelFields():
230 normalizedName = field.GetFieldName()
231 if "index_ref" in normalizedName:
232 continue
233 if regexIOVMetaDataContainer.match(field.GetTypeName()):
234 # if field name is e.g. IOVMetaDataContainer_p1__Digitization_Parameters,
235 # strip the prefix and change underscore to slash to slash
236 normalizedName = (
237 field.GetFieldName()
238 .replace("IOVMetaDataContainer_p1_", "")
239 .replace("_", "/")
240 )
241 meta_dict[filename]["metadata_items"][normalizedName] = (
242 "IOVMetaDataContainer"
243 )
244 elif regexByteStreamMetadataContainer.match(field.GetTypeName()):
245 meta_dict[filename]["metadata_items"][field.GetFieldName()] = (
246 "ByteStreamMetadataContainer"
247 )
248 elif regexEventStreamInfo.match(field.GetTypeName()):
249 meta_dict[filename]["metadata_items"][field.GetFieldName()] = (
250 "EventStreamInfo"
251 )
252 elif regexXAODFileMetaData.match(field.GetTypeName()):
253 meta_dict[filename]["metadata_items"][
254 field.GetFieldName().replace("xAOD__", "xAOD::")
255 ] = field.GetTypeName()
256 elif regexXAODFileMetaDataAuxDyn.match(
257 normalizedName := field.GetFieldName()
258 .replace("xAOD__", "xAOD::")
259 .replace("AuxDyn:", "AuxDyn.")
260 ):
261 result = (
262 False
263 if entry[field.GetFieldName()] == "\x00"
264 else entry[field.GetFieldName()]
265 )
266 dynamic_fmd_items[normalizedName.split(".")[1]] = result
267 meta_dict[filename]["metadata_items"][normalizedName] = (
268 field.GetTypeName()
269 )
270 continue
271 elif regexXAODFileMetaDataAux.match(field.GetTypeName()):
272 meta_dict[filename]["metadata_items"][
273 field.GetFieldName()
274 .replace("xAOD__", "xAOD::")
275 .replace("Aux:", "Aux.")
276 ] = field.GetTypeName()
277 elif regexXAODTruthMetaData.match(field.GetTypeName()):
278 meta_dict[filename]["metadata_items"][
279 field.GetFieldName()
280 .replace("xAOD__", "xAOD::")
281 .replace("DataVector_", "DataVector<")
282 .replace("__Truth", ">_Truth")
283 ] = "TruthMetaData"
284 elif regexXAODTruthMetaDataAux.match(field.GetTypeName()):
285 meta_dict[filename]["metadata_items"][
286 field.GetFieldName()
287 .replace("xAOD__", "xAOD::")
288 .replace("Aux:", "Aux.")
289 ] = field.GetTypeName()
290 elif regexXAODEventFormat.match(field.GetTypeName()):
291 meta_dict[filename]["metadata_items"][
292 field.GetFieldName().replace("xAOD__", "xAOD::")
293 ] = field.GetTypeName()
294 elif regexXAODTriggerMenuJson.match(field.GetTypeName()):
295 meta_dict[filename]["metadata_items"][
296 field.GetFieldName()
297 .replace("xAOD__", "xAOD::")
298 .replace("DataVector_", "DataVector<")
299 .replace("__Trigger", ">_Trigger")
300 ] = field.GetTypeName()
301 elif regexXAODTriggerMenuJsonAux.match(field.GetTypeName()):
302 meta_dict[filename]["metadata_items"][
303 field.GetFieldName()
304 .replace("xAOD__", "xAOD::")
305 .replace("Aux:", "Aux.")
306 ] = field.GetTypeName()
307 elif regexXAODCutBookkeeperContainer.match(field.GetTypeName()):
308 meta_dict[filename]["metadata_items"][
309 field.GetFieldName()
310 .replace("xAOD__", "xAOD::")
311 .replace("DataVector_", "DataVector<")
312 .replace("__CutBookkeeper", ">_CutBookkeeper")
313 ] = field.GetTypeName()
314 elif regexXAODCutBookkeeperContainerAux.match(field.GetTypeName()):
315 meta_dict[filename]["metadata_items"][
316 field.GetFieldName()
317 .replace("xAOD__", "xAOD::")
318 .replace("Aux:", "Aux.")
319 ] = field.GetTypeName()
320 else:
321 meta_dict[filename]["metadata_items"][
322 field.GetFieldName().replace("Aux:", "Aux.")
323 ] = field.GetTypeName()
324
325 if field.GetTypeName() in classes_with_aux:
326 # handle aux classes later
327 auxes[field.GetFieldName()] = field.GetTypeName()
328 continue
329
330 if not should_keep_meta(
331 normalizedName, field.GetTypeName(), meta_filter
332 ):
333 continue
334
335 try:
336 meta_dict[filename][normalizedName] = _convert_value(
337 entry[field.GetFieldName()]
338 )
339 except KeyError:
340 msg.warning(f"missing type {field.GetTypeName()}")
341
342 meta_dict[filename]["metadata_items"] = denormalize_metadata_types(
343 meta_dict[filename]["metadata_items"]
344 )
345
346 def _get_aux_base(aux_key: str) -> str:
347 # Remove known prefixes
348 key = aux_key
349 key = key.replace("xAOD__TriggerMenuJsonAuxContainer_v1_", "")
350 key = key.replace("xAOD__FileMetaDataAuxInfo_v1_", "")
351 key = key.replace("xAOD__TruthMetaDataAuxContainer_v1_", "")
352 # Remove known suffixes
353 if key.endswith("Aux:"):
354 key = key[:-4]
355 elif key.endswith("Aux"):
356 key = key[:-3]
357 # Remove any trailing ':' or '_'
358 key = key.strip("_:")
359 return key
360
361 def _get_main_base(main_key: str) -> str:
362 main_base = main_key
363 # For DataVectors
364 if main_key.startswith("DataVector_xAOD__TriggerMenuJson_v1__"):
365 main_base = main_key.replace(
366 "DataVector_xAOD__TriggerMenuJson_v1__", ""
367 )
368 # For FileMetaData
369 elif main_key.startswith("xAOD__FileMetaData_v1_"):
370 main_base = main_key.replace("xAOD__FileMetaData_v1_", "")
371 # For TruthMetaData
372 elif main_key.startswith("DataVector_xAOD__TruthMetaData_v1__"):
373 main_base = main_key.replace(
374 "DataVector_xAOD__TruthMetaData_v1__", ""
375 )
376 return main_base
377
378 def _find_associated_pairs(auxes: dict) -> list[tuple[str, str]]:
379 # Build lookup tables
380 aux_map = {}
381 for k in auxes:
382 if "Aux" in k:
383 aux_map[_get_aux_base(k)] = k
384
385 main_map = {}
386 for k in auxes:
387 base = _get_main_base(k)
388 if base:
389 main_map[base] = k
390
391 # Find pairs
392 pairs = []
393 for base, aux_key in aux_map.items():
394 if base in main_map:
395 pairs.append((aux_key, main_map[base]))
396 return pairs
397
398 for pair in _find_associated_pairs(auxes):
399 return_obj = _convert_value(
400 entry[pair[1]],
401 entry[pair[0]],
402 )
403 key = next(
404 (
405 k
406 for k, v in trigger_menu_json_map.items()
407 if v
408 == pair[1]
409 .replace("xAOD__", "xAOD::")
410 .replace("DataVector_", "DataVector<")
411 .replace("__Trigger", ">_Trigger")
412 ),
413 auxes[pair[0]],
414 )
415
416 try:
417 key = (
418 key.replace("xAOD__", "xAOD::")
419 if key.count("_") <= 1
420 else key.replace("xAOD__", "xAOD::").rsplit("_", 2)[0]
421 )
422 except IndexError:
423 pass
424
425 if not should_keep_meta(
426 pair[0]
427 .replace("xAOD__", "xAOD::")
428 .replace("DataVector_", "DataVector<")
429 .replace("__Trigger", ">_Trigger")
430 .replace("Aux:", "Aux."),
431 key,
432 meta_filter,
433 ):
434 continue
435
436 if "TriggerMenuJson" in pair[0]:
437 if "RAWTriggerMenuJson" in return_obj:
438 key = (
439 pair[1]
440 if pair[0].startswith("Trigger")
441 else trigger_menu_json_map[pair[0]]
442 )
443 meta_dict[filename][key] = return_obj["RAWTriggerMenuJson"]
444 del return_obj["RAWTriggerMenuJson"]
445 if "TriggerConfigInfo" not in meta_dict[filename]:
446 meta_dict[filename]["TriggerConfigInfo"] = {}
447 if "dbkey" in return_obj:
448 meta_dict[filename]["TriggerConfigInfo"][
449 pair[0].split("_")[-1].replace("Aux:", "")
450 ] = {"key": return_obj["dbkey"], "name": return_obj["name"]}
451 del return_obj["dbkey"]
452 del return_obj["name"]
453 if "TriggerMenu" not in meta_dict[filename]:
454 meta_dict[filename]["TriggerMenu"] = {}
455 meta_dict[filename]["TriggerMenu"].update(return_obj)
456 elif "FileMetaData" in pair[0]:
457 if "FileMetaData" not in meta_dict[filename]:
458 meta_dict[filename]["FileMetaData"] = dynamic_fmd_items
459 meta_dict[filename]["FileMetaData"].update(return_obj)
460 elif "TruthMetaData" in pair[0]:
461 if pair == ("TruthMetaDataAux:", "TruthMetaData"):
462 if "TruthMetaData" not in meta_dict[filename]:
463 meta_dict[filename]["TruthMetaData"] = {}
464 meta_dict[filename]["TruthMetaData"].update(return_obj)
465 else:
466 # for backward compatibility
467 meta_dict[filename][
468 pair[1]
469 .replace("xAOD__", "xAOD::")
470 .replace("DataVector_", "DataVector<")
471 .replace("__Truth", ">_Truth")
472 ] = {}
473 meta_dict[filename][
474 pair[0]
475 .replace("xAOD__", "xAOD::")
476 .replace("Aux:", "Aux.")
477 ] = {}
478 elif pair == ("CutBookkeepersAux:", "CutBookkeepers"):
479 meta_dict[filename]["CutBookkeepers"] = return_obj
480
481 msg.debug(f"Read metadata from RNTuple: {meta_dict[filename]}")
482
483 else:
484 # ----- read extra metadata required for 'lite' and 'full' modes ----------------------------------------#
485 if mode != 'tiny':
486 # selecting from all tree the only one which contains metadata, respectively "MetaData"
487 metadata_tree = current_file.Get('MetaData')
488 # read all list of branches stored in "MetaData" tree
489 metadata_branches = metadata_tree.GetListOfBranches()
490 nr_of_branches = metadata_branches.GetEntriesFast()
491
492 # object to store the names of metadata containers and their corresponding class name.
493 meta_dict[filename]['metadata_items'] = {}
494
495 meta_filter = get_meta_filter(mode, meta_key_filter)
496
497 # store all persistent classes for metadata container existing in a POOL/ROOT file.
498 persistent_instances = {}
499 dynamic_fmd_items = {}
500
501 # Protect non-Gaudi environments from meta-data classes it doesn't know about
502 if not isGaudiEnv():
503 metadata_tree.SetBranchStatus("*", False)
504
505 for i in range(0, nr_of_branches):
506 branch = metadata_branches.At(i)
507 name = branch.GetName()
508 if name == 'index_ref':
509 # skip the index branch
510 continue
511
512 class_name = branch.GetClassName()
513
514 if regexIOVMetaDataContainer.match(class_name):
515 name = name.replace('IOVMetaDataContainer_p1_', '').replace('_', '/')
516
517 if regexIOVMetaDataContainer.match(class_name):
518 meta_dict[filename]['metadata_items'][name] = 'IOVMetaDataContainer'
519 elif regexByteStreamMetadataContainer.match(class_name):
520 meta_dict[filename]['metadata_items'][name] = 'ByteStreamMetadataContainer'
521 elif regexEventStreamInfo.match(class_name):
522 meta_dict[filename]['metadata_items'][name] = 'EventStreamInfo'
523 elif regexXAODFileMetaData.match(class_name):
524 meta_dict[filename]['metadata_items'][name] = 'FileMetaData'
525 elif regexXAODTruthMetaData.match(class_name):
526 meta_dict[filename]['metadata_items'][name] = 'TruthMetaData'
527 else:
528 type_name = class_name
529 if not type_name:
530 try:
531 type_name = branch.GetListOfLeaves()[0].GetTypeName()
532 except IndexError:
533 pass
534 meta_dict[filename]['metadata_items'][name] = type_name
535
536 if len(meta_filter) > 0:
537 keep = False
538 for filter_key, filter_class in meta_filter.items():
539 if (filter_key.replace('/', '_') in name.replace('/', '_') or filter_key == '*') and fnmatchcase(class_name, filter_class):
540 if 'CutBookkeepers' in filter_key:
541 keep = filter_key == name
542 if keep:
543 break
544 else:
545 keep = True
546 break
547
548 if not keep:
549 continue
550 else:
551 # CutBookkeepers should always be filtered:
552 if 'CutBookkeepers' in name and name not in ['CutBookkeepers', 'CutBookkeepersAux.']:
553 continue
554
555 if not isGaudiEnv():
556 metadata_tree.SetBranchStatus(f"{name}*", True)
557
558 # assign the corresponding persistent class based of the name of the metadata container
559 if regexEventStreamInfo.match(class_name):
560 if class_name.endswith('_p1'):
561 persistent_instances[name] = ROOT.EventStreamInfo_p1()
562 elif class_name.endswith('_p2'):
563 persistent_instances[name] = ROOT.EventStreamInfo_p2()
564 else:
565 persistent_instances[name] = ROOT.EventStreamInfo_p3()
566 elif regexIOVMetaDataContainer.match(class_name):
567 persistent_instances[name] = ROOT.IOVMetaDataContainer_p1()
568 elif regexXAODEventFormat.match(class_name):
569 persistent_instances[name] = ROOT.xAOD.EventFormat_v1()
570 elif regexXAODTriggerMenu.match(class_name) and _check_project() not in ['AthGeneration']:
571 persistent_instances[name] = ROOT.xAOD.TriggerMenuContainer_v1()
572 elif regexXAODTriggerMenuAux.match(class_name) and _check_project() not in ['AthGeneration']:
573 persistent_instances[name] = ROOT.xAOD.TriggerMenuAuxContainer_v1()
574 elif regexXAODTriggerMenuJson.match(class_name) and _check_project() not in ['AthGeneration']:
575 persistent_instances[name] = ROOT.xAOD.TriggerMenuJsonContainer_v1()
576 elif regexXAODTriggerMenuJsonAux.match(class_name) and _check_project() not in ['AthGeneration']:
577 persistent_instances[name] = ROOT.xAOD.TriggerMenuJsonAuxContainer_v1()
578 elif regexXAODCutBookkeeperContainer.match(class_name):
579 persistent_instances[name] = ROOT.xAOD.CutBookkeeperContainer_v1()
580 elif regexXAODCutBookkeeperContainerAux.match(class_name):
581 persistent_instances[name] = ROOT.xAOD.CutBookkeeperAuxContainer_v1()
582 elif regexXAODFileMetaData.match(class_name):
583 persistent_instances[name] = ROOT.xAOD.FileMetaData_v1()
584 elif regexXAODFileMetaDataAux.match(class_name):
585 persistent_instances[name] = ROOT.xAOD.FileMetaDataAuxInfo_v1()
586 elif regexXAODTruthMetaData.match(class_name):
587 persistent_instances[name] = ROOT.xAOD.TruthMetaDataContainer_v1()
588 elif regexXAODTruthMetaDataAux.match(class_name):
589 persistent_instances[name] = ROOT.xAOD.TruthMetaDataAuxContainer_v1()
590
591 if name in persistent_instances:
592 branch.SetAddress(ROOT.AddressOf(persistent_instances[name]))
593
594 # This creates a dict to store the dynamic attributes of the xAOD::FileMetaData
595 dynamicFMD = regexXAODFileMetaDataAuxDyn.match(name)
596 if dynamicFMD:
597 dynamicName = dynamicFMD.group().split('.')[-1]
598 dynamicType = regex_cppname.match(class_name)
599 if dynamicType:
600 # this should be a string
601 dynamic_fmd_items[dynamicName] = ROOT.std.string()
602 branch.SetAddress(ROOT.AddressOf(dynamic_fmd_items[dynamicName]))
603 else:
604 dynamic_fmd_items[dynamicName] = None
605
606
607 metadata_tree.GetEntry(0)
608
609 # This loads the dynamic attributes of the xAOD::FileMetaData from the TTree
610 for key in dynamic_fmd_items:
611 if dynamic_fmd_items[key] is None:
612 try:
613 if key.startswith("is"):
614 # this is probably a boolean
615 dynamic_fmd_items[key] = getattr(metadata_tree, key) != '\x00'
616 else:
617 # this should be a float
618 dynamic_fmd_items[key] = getattr(metadata_tree, key)
619 except AttributeError:
620 # should not happen, but just ignore missing attributes
621 pass
622 else:
623 # convert ROOT.std.string objects to python equivalent
624 dynamic_fmd_items[key] = str(dynamic_fmd_items[key])
625
626 # clean the meta-dict if the meta_key_filter flag is used, to return only the key of interest
627 if meta_key_filter:
628 meta_dict[filename] = {}
629
630 # Find the RunNumber from EventStreamInfo, so only the relevant IOVs are read later
631 for key, obj in persistent_instances.items():
632 if key.startswith('EventStreamInfo'):
633 data = _convert_value(obj, None)
634 if data and 'runNumbers' in data and len(data['runNumbers']) == 1:
635 _gbl_run = data['runNumbers'][0]
636 msg.debug("Found RunNumber in EventStreamInfo: %d", _gbl_run)
637
638 # read the metadata
639 for name, content in persistent_instances.items():
640 key = name
641 if hasattr(content, 'm_folderName'):
642 key = content.m_folderName
643
644 # Some transition AODs contain both the Run2 and Run3 metadata formats. We only wish to read the Run3 format if such a file is encountered.
645 has_r3_trig_meta = ('TriggerMenuJson_HLT' in persistent_instances or 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT' in persistent_instances)
646 aux = None
647 if key.startswith('TriggerMenuJson_') and not key.endswith('Aux.'): # interface container for the menu (AOD)
648 aux = persistent_instances[key+'Aux.']
649 elif key.startswith('DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_') and not key.endswith('Aux.'): # interface container for the menu (ESD)
650 menuPart = key.split('_')[-1]
651 aux = persistent_instances['xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_'+menuPart+'Aux.']
652 elif key == 'TriggerMenu' and 'TriggerMenuAux.' in persistent_instances and not has_r3_trig_meta: # AOD case (legacy support, HLT and L1 menus)
653 aux = persistent_instances['TriggerMenuAux.']
654 elif key == 'DataVector<xAOD::TriggerMenu_v1>_TriggerMenu' and 'xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.' in persistent_instances and not has_r3_trig_meta: # ESD case (legacy support, HLT and L1 menus)
655 aux = persistent_instances['xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.']
656 elif (key == 'CutBookkeepers'
657 and 'CutBookkeepersAux.' in persistent_instances):
658 aux = persistent_instances['CutBookkeepersAux.']
659 elif key == 'CutBookkeepersAux.':
660 continue # Extracted using the interface object
661 elif (key == 'FileMetaData'
662 and 'FileMetaDataAux.' in persistent_instances):
663 aux = persistent_instances['FileMetaDataAux.']
664 elif (key == 'xAOD::FileMetaData_v1_FileMetaData'
665 and 'xAOD::FileMetaDataAuxInfo_v1_FileMetaDataAux.' in persistent_instances):
666 aux = persistent_instances['xAOD::FileMetaDataAuxInfo_v1_FileMetaDataAux.']
667 elif (key == 'TruthMetaData'
668 and 'TruthMetaDataAux.' in persistent_instances):
669 aux = persistent_instances['TruthMetaDataAux.']
670 elif key == 'TruthMetaDataAux.':
671 continue # Extracted using the interface object
672 elif 'Menu' in key and key.endswith('Aux.'):
673 continue # Extracted using the interface object
674
675 return_obj = _convert_value(content, aux)
676
677 if 'TriggerMenuJson' in key or ('TriggerMenu' in key and not has_r3_trig_meta):
678 if 'RAWTriggerMenuJson' in return_obj:
679 meta_dict[filename][key] = return_obj['RAWTriggerMenuJson']
680 del return_obj['RAWTriggerMenuJson']
681 if 'TriggerConfigInfo' not in meta_dict[filename]:
682 meta_dict[filename]['TriggerConfigInfo'] = {}
683 if 'dbkey' in return_obj:
684 meta_dict[filename]['TriggerConfigInfo'][key.split('_')[-1]] = {
685 'key' : return_obj['dbkey'],
686 'name': return_obj['name']
687 }
688 del return_obj['dbkey']
689 del return_obj['name']
690 if 'TriggerMenu' not in meta_dict[filename]:
691 meta_dict[filename]['TriggerMenu'] = {}
692 meta_dict[filename]['TriggerMenu'].update(return_obj)
693 elif "FileMetaData" in key:
694 if "FileMetaData" not in meta_dict[filename]:
695 meta_dict[filename]["FileMetaData"] = dynamic_fmd_items
696 meta_dict[filename]["FileMetaData"].update(return_obj)
697 else:
698 meta_dict[filename][key] = return_obj
699
700 try:
701 # get the number of events from EventStreamInfo
702 esi_dict = next(key for key, value in meta_dict[filename].items()
703 if isinstance(value, dict) and "numberOfEvents" in value and
704 meta_dict[filename]["metadata_items"][key] == "EventStreamInfo")
705 msg.debug(f"{esi_dict=}")
706 meta_dict[filename]["nentries"] = meta_dict[filename][esi_dict]["numberOfEvents"]
707 except StopIteration as err:
708 msg.debug(f"Caught {err=}, {type(err)=}, falling back on opening the DataHeader"
709 " Container to read the number of entries")
710 meta_dict[filename]['nentries'] = dataheader_nentries(current_file)
711 msg.debug(f"{meta_dict[filename]['nentries']=}")
712
713 if unique_tag_info_values and mode=='iov':
714 unique_tag_info_values = False
715 msg.info('disabling "unique_tag_info_values" option for "iov" mode')
716
717 # This is a required workaround which will temporarily be fixing ATEAM-560 originated from ATEAM-531
718 # ATEAM-560: https://its.cern.ch/jira/browse/ATEAM-560
719 # ATEAM-531: https://its.cern.ch/jira/browse/ATEAM-531
720 # This changes will remove all duplicates values presented in some files due
721 # to the improper merging of two IOVMetaDataContainers.
722 if unique_tag_info_values:
723 msg.info('MetaReader is called with the parameter "unique_tag_info_values" set to True. '
724 'This is a workaround to remove all duplicate values from "/TagInfo" key')
725 if '/TagInfo' in meta_dict[filename]:
726 for key, value in meta_dict[filename]['/TagInfo'].items():
727 if isinstance(value, list) and value:
728 if len(unique_values := set(value)) > 1:
729 msg.warning(
730 f"Found multiple values for {key}: {value}. "
731 "Looking for possible duplicates."
732 )
733 maybe_ok = False
734 if key == "AMITag":
735 # curate duplicates like: ['s3681_q453', 's3681_q453_'] or ["s3681_q453", "q453_s3681"]
736 unique_amitags = set()
737 for amitags in unique_values:
738 unique_amitags.add(
739 "_".join([tag for tag in amitags.split("_") if tag])
740 )
741 # Remove tags that are parents - are contained in other tags
742 # Such that ["s3681","s3681_d1485"] keeps only the latter
743 parent_tags = []
744 for atag in unique_amitags:
745 if any(atag+'_' in x for x in unique_amitags if x != atag):
746 parent_tags += [atag]
747 for atag in parent_tags:
748 # Do not remove the last tag!
749 if len(unique_amitags)>1:
750 msg.warning(f"Removing parent AMI tag {atag}")
751 unique_amitags.remove(atag)
752 if len(unique_amitags) == 1:
753 maybe_ok = True
754 # Make sure we keep the one we want to keep
755 value.insert(0,list(unique_amitags)[0])
756 elif key == "beam_energy":
757 # handle duplicates like: ['6500000', '6500000.0'] or [3, "3"]
758 unique_energies = set()
759 for energy in unique_values:
760 try:
761 energy = int(energy)
762 except ValueError:
763 try:
764 energy = float(energy)
765 except ValueError:
766 pass
767 unique_energies.add(energy)
768 if len(unique_energies) == 1:
769 maybe_ok = True
770 elif key in ["AtlasRelease", "IOVDbGlobalTag", "AODFixVersion"]:
771 maybe_ok = True
772 if maybe_ok:
773 msg.warning(
774 f"Multiple values for {key} may mean the same, or "
775 "the input file was produced in multi-step job. "
776 f"Ignoring all but the first entry: {key} = {value[0]}"
777 )
778 else:
779 raise ValueError(
780 f"{key} from /TagInfo contains more than 1 unique value: {value}"
781 )
782
783 meta_dict[filename]['/TagInfo'][key] = value[0]
784
785 if promote is None:
786 promote = mode == 'lite' or mode == 'peeker'
787
788 # Filter the data and create a prettier output for the 'lite' mode
789 if mode == 'lite':
790 meta_dict = make_lite(meta_dict)
791
792 if mode == 'peeker':
793 meta_dict = make_peeker(meta_dict)
794
795 if promote:
796 meta_dict = promote_keys(meta_dict, mode)
797
798 # If AnalysisBase the itemList must be grabbed another way
799 if not isGaudiEnv():
800 if isinstance(collectionTree, ROOT.TTree):
801 meta_dict[filename]['itemList'] = [ (b.GetClassName(), b.GetName()) for b in collectionTree.GetListOfBranches() ]
802
803 # ----- retrieves metadata from bytestream (BS) files (RAW, DRAW) ------------------------------------------#
804 elif current_file_type == 'BS':
805
806 if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and not os.path.isfile(filename):
807 msg.warning('Ignoring not accessible file: {}'.format(filename))
808 continue
809
810 import eformat
811
812 # store the number of entries
813 bs = eformat.istream(filename)
814 meta_dict[filename]['nentries'] = bs.total_events
815
816 # store the 'guid' value
817 data_reader = eformat.EventStorage.pickDataReader(filename)
818 assert data_reader, 'problem picking a data reader for file [%s]' % filename
819
820 # set auto flush equivalent, which for BS is always 1
821 meta_dict[filename]['auto_flush'] = 1
822
823 if hasattr(data_reader, 'GUID'):
824 meta_dict[filename]['file_guid'] = data_reader.GUID()
825
826 # compression level and algorithm, for BS always ZLIB
827 meta_dict[filename]['file_comp_alg'] = 1
828 meta_dict[filename]['file_comp_level'] = 1
829
830
831 # if the flag full is set to true then grab all metadata
832 # ------------------------------------------------------------------------------------------------------#
833 if mode != "tiny":
834 bs_metadata = {}
835
836 for md in data_reader.freeMetaDataStrings():
837 if md.startswith('Event type:'):
838 k = 'eventTypes'
839 v = []
840 if 'is sim' in md:
841 v.append('IS_SIMULATION')
842 else:
843 v.append('IS_DATA')
844
845 if 'is atlas' in md:
846 v.append('IS_ATLAS')
847 else:
848 v.append('IS_TESTBEAM')
849
850 if 'is physics' in md:
851 v.append('IS_PHYSICS')
852 else:
853 v.append('IS_CALIBRATION')
854
855 bs_metadata[k] = tuple(v)
856
857 elif md.startswith('GeoAtlas:'):
858 k = 'geometry'
859 v = md.split('GeoAtlas:')[1].strip()
860 bs_metadata[k] = v
861
862 elif md.startswith('IOVDbGlobalTag:'):
863 k = 'conditions_tag'
864 v = md.split('IOVDbGlobalTag:')[1].strip()
865 bs_metadata[k] = v
866
867 elif '=' in md:
868 k, v = md.split('=', 1) # Split on first '=' only
869 bs_metadata[k] = v
870
871 bs_metadata['detectorMask'] = data_reader.detectorMask()
872 bs_metadata['runNumbers'] = data_reader.runNumber()
873 bs_metadata['lumiBlockNumbers'] = data_reader.lumiblockNumber()
874 bs_metadata['projectTag'] = data_reader.projectTag()
875 bs_metadata['stream'] = data_reader.stream()
876 #bs_metadata['beamType'] = getattr(data_reader, 'beamType')()
877 beamTypeNbr= data_reader.beamType()
878 #According to info from Rainer and Guiseppe the beam type is
879 #O: no beam
880 #1: protons
881 #2: ions
882 if (beamTypeNbr==0): bs_metadata['beamType'] = 'cosmics'
883 elif (beamTypeNbr==1 or beamTypeNbr==2): bs_metadata['beamType'] = 'collisions'
884 else: bs_metadata['beamType'] = 'unknown'
885
886 bs_metadata['beamEnergy'] = data_reader.beamEnergy()
887
888 meta_dict[filename]['eventTypes'] = bs_metadata.get('eventTypes', [])
889 meta_dict[filename]['GeoAtlas'] = bs_metadata.get('geometry', None)
890 meta_dict[filename]['conditions_tag'] = bs_metadata.get('conditions_tag', None)
891 meta_dict[filename]['project_name'] = bs_metadata.get('projectTag', None)
892
893 # Promote up one level
894 meta_dict[filename]['detectorMask'] = [bs_metadata.get('detectorMask', None)]
895 meta_dict[filename]['runNumbers'] = [bs_metadata.get('runNumbers', None)]
896 meta_dict[filename]['lumiBlockNumbers'] = [bs_metadata.get('lumiBlockNumbers', None)]
897 meta_dict[filename]['beam_type'] = bs_metadata.get('beamType', None)
898 meta_dict[filename]['beam_energy'] = bs_metadata.get('beamEnergy', None)
899 meta_dict[filename]['stream'] = bs_metadata.get('stream', None)
900
901 if not data_reader.good():
902 # event-less file...
903 meta_dict[filename]['runNumbers'].append(bs_metadata.get('run_number', 0))
904 meta_dict[filename]['lumiBlockNumbers'].append(bs_metadata.get('LumiBlock', 0))
905
906 msg.debug(f"{meta_dict[filename]=}")
907 msg.debug(f"{len(bs)=}")
908 if len(bs):
909 evt = bs[0]
910 try:
911 evt.check()
912 meta_dict[filename]['processingTags'] = [tag.name for tag in evt.stream_tag()]
913 meta_dict[filename]['evt_number'] = [evt.global_id()]
914 meta_dict[filename]['run_type'] = [eformat.helper.run_type2string(evt.run_type())]
915 # ATLASRECTS-7126: If there is no valid lumiblock information
916 # in the ByteStream header, get the info from the first event.
917 if meta_dict[filename]['lumiBlockNumbers'] == [0]:
918 msg.debug('Taking the luminosity block info from the first event (%i)', evt.lumi_block())
919 meta_dict[filename]['lumiBlockNumbers'] = [evt.lumi_block()]
920 # ATLASRECTS-7126: If there is no valid run number information
921 # in the ByteStream header, get the info from the first event.
922 if meta_dict[filename]['runNumbers'] == [0]:
923 msg.debug('Taking the run number info from the first event (%i)', evt.run_no())
924 meta_dict[filename]['runNumbers'] = [evt.run_no()]
925 except RuntimeError as err:
926 msg.error("Issue while reading the first event of BS file %r: %r", filename, err)
927 else:
928 msg.debug(f"{meta_dict[filename]=}")
929 else:
930 msg.warning(f"Event-less BS {filename=}, will not read metadata information from the first event")
931
932 # fix for ATEAM-122
933 if len(bs_metadata.get('eventTypes', '')) == 0: # see: ATMETADATA-6
934 evt_type = ['IS_DATA', 'IS_ATLAS']
935 if bs_metadata.get('stream', '').startswith('physics_'):
936 evt_type.append('IS_PHYSICS')
937 elif bs_metadata.get('stream', '').startswith('calibration_'):
938 evt_type.append('IS_CALIBRATION')
939 elif bs_metadata.get('projectTag', '').endswith('_calib'):
940 evt_type.append('IS_CALIBRATION')
941 else:
942 evt_type.append('Unknown')
943
944 meta_dict[filename]['eventTypes'] = evt_type
945
946 if mode == 'full':
947 meta_dict[filename]['bs_metadata'] = bs_metadata
948
949 # ------ Throw an error if the user provide other file types -------------------------------------------------#
950 else:
951 msg.error('Unknown filetype for {0} - there is no metadata interface for type {1}'.format(filename, current_file_type))
952 return None
953
954 return meta_dict
955
956
958 import os
959 if 'AthSimulation_DIR' in os.environ:
960 return 'AthSimulation'
961 if 'AthGeneration_DIR' in os.environ:
962 return 'AthGeneration'
963 return 'Athena'
964
965
966def _get_pfn(filename):
967 """
968 Extract the actual filename if LFN or PFN notation is used
969 """
970 pfx = filename[0:4]
971 if pfx == 'PFN:':
972 return filename[4:]
973 if pfx == 'LFN:':
974 import subprocess, os
975 os.environ['POOL_OUTMSG_LEVEL'] = 'Error'
976 output = subprocess.check_output(['FClistPFN','-l',filename[4:]],text=True).split('\n')
977 if len(output) == 2:
978 return output[0]
979 msg.error( 'FClistPFN({0}) returned unexpected number of lines:'.format(filename) )
980 msg.error( '\n'.join(output) )
981 return filename
982
983
984def _read_guid(filename):
985 """
986 Extracts the "guid" (Globally Unique Identifier) in POOL files and Grid catalogs) value from a POOL file.
987 :param filename: the input file
988 :return: the guid value, None if unavailable
989 """
990 import ROOT
991 root_file = ROOT.TFile.Open( _get_pfn(filename) )
992 params = root_file.Get('##Params')
993 try:
994 from ROOT import RNTuple as rnt
995 except ImportError:
996 from ROOT.Experimental import RNTuple as rnt
997 if not params:
998 return
999 if not isinstance(params, ROOT.TTree) and not isinstance(params, rnt) and not isinstance(params, ROOT.TDirectory):
1000 raise NotImplementedError(f"Cannot extract GUID from object {params!r} of type {type(params)!r}")
1001
1002 regex = re.compile(r'\‍[NAME=(\w+)\‍]\‍[VALUE=(.*)\‍]', re.ASCII)
1003 fid = None
1004
1005 if isinstance(params, ROOT.TTree):
1006 for entry in params:
1007 param = entry.GetLeaf('db_string').GetValueString()
1008 result = regex.match(param)
1009 if result and result.group(1) == 'FID' :
1010 # don't exit yet, it's the last FID entry that counts
1011 fid = result.group(2)
1012 elif isinstance(params, rnt):
1013 try:
1014 from ROOT import RNTupleReader
1015 except ImportError:
1016 from ROOT.Experimental import RNTupleReader
1017 reader = RNTupleReader.Open(params)
1018 try:
1019 entry = reader.CreateEntry()
1020 except AttributeError:
1021 entry = reader.GetModel().CreateEntry()
1022 for idx in range(reader.GetNEntries()):
1023 reader.LoadEntry(idx, entry)
1024 try:
1025 result = regex.match(str(entry['db_string']))
1026 except (AttributeError, TypeError) as err:
1027 # Early RNTuple implementation doesn't allow reading
1028 # strings on the python side, might be triggering it...
1029 msg.error(f"Cannot read FID from ##Params in RNTuple w/ ROOT error: {err}")
1030 return None
1031 if result and result.group(1) == 'FID' :
1032 # don't exit yet, it's the last FID entry that counts
1033 fid = result.group(2)
1034 elif isinstance(params, ROOT.TDirectory):
1035 for key in params.GetListOfKeys():
1036 param = params.Get(key.GetName())
1037 result = regex.match(str(param))
1038 if result and result.group(1) == 'FID' :
1039 # don't exit yet, it's the last FID entry that counts
1040 fid = result.group(2)
1041
1042 return fid
1043
1044
1046 result = {}
1047
1048 for meth in dir(obj):
1049 if not meth.startswith('_'):
1050 if meth.startswith('m_'):
1051
1052 field_name = str(meth)[2:]
1053 field_value = getattr(obj, meth)
1054
1055 result[field_name] = _convert_value(field_value)
1056
1057 return result
1058
1059
1060def _convert_value(value, aux = None):
1061 cl=value.__class__
1062 if hasattr(cl, '__cpp_name__'):
1063 result = regex_cppname.match(cl.__cpp_name__)
1064 if result:
1065 cpp_type = result.group(1)
1066 if cpp_type == 'vector' or cpp_type == 'std::vector':
1067 return [_convert_value(val) for val in value]
1068 elif cpp_type == 'set' or cpp_type == 'std::set':
1069 return {_convert_value(val) for val in value}
1070 elif cpp_type == 'pair' or cpp_type == 'std::pair':
1071 return _convert_value(value.first), _convert_value(value.second)
1072
1073 # elif cpp_type == 'long':
1074 # return int(value)
1075
1076 elif cpp_type == 'string' or cpp_type == 'std::string':
1077 return str(value)
1078
1079 elif cl.__cpp_name__ == "_Bit_reference":
1080 return bool(value)
1081
1082 # special case which extracts data in a better format from IOVPayloadContainer_p1 class
1083 elif cl.__cpp_name__ == 'IOVMetaDataContainer_p1':
1084 return _extract_fields_iovmdc(value)
1085
1086 elif cl.__cpp_name__ == 'IOVPayloadContainer_p1':
1087 if _gbl_mode == 'iov':
1088 return _extract_iov_detailed(value)
1089 elif _gbl_run is not None:
1090 r = _extract_iov_for_run(value, _gbl_run)
1091 if r is not None:
1092 return r
1093 # fall through to the default extraction if no IOV found for the run
1094 return _extract_fields_iov( value, range(value.m_attrIndexes.size()) )
1095
1096 elif cl.__cpp_name__ == 'xAOD::EventFormat_v1':
1097 return _extract_fields_ef(value)
1098 elif cl.__cpp_name__ == 'xAOD::CutBookkeeperContainer_v1':
1099 return _extract_fields_cbk(interface=value, aux=aux)
1100 elif cl.__cpp_name__ == 'xAOD::FileMetaData_v1':
1101 return _extract_fields_fmd(interface=value, aux=aux)
1102 elif cl.__cpp_name__ == 'DataVector<xAOD::TruthMetaData_v1>':
1103 return _extract_fields_tmd(interface=value, aux=aux)
1104
1105 elif cl.__cpp_name__ == 'DataVector<xAOD::TriggerMenu_v1>' :
1106 return _extract_fields_triggermenu(interface=value, aux=aux)
1107
1108 elif cl.__cpp_name__ == 'DataVector<xAOD::TriggerMenuJson_v1>' :
1109 return _extract_fields_triggermenujson(interface=value, aux=aux)
1110
1111 elif (cl.__cpp_name__ == 'EventStreamInfo_p1' or
1112 cl.__cpp_name__ == 'EventStreamInfo_p2' or
1113 cl.__cpp_name__ == 'EventStreamInfo_p3'):
1114 return _extract_fields_esi(value)
1115
1116 elif (cl.__cpp_name__ == 'EventType_p1' or
1117 cl.__cpp_name__ == 'EventType_p3'):
1118 fields = _extract_fields(value)
1119 fields = _convert_event_type_bitmask(fields)
1120 fields = _convert_event_type_user_type(fields)
1121 return fields
1122
1123 elif regex_persistent_class.match(cl.__cpp_name__):
1124 return _extract_fields(value)
1125
1126 return value
1127
1128
1129def _get_attribute_val(iov_container, attr_name, attr_idx):
1130 type_idx = attr_idx.typeIndex()
1131 obj_idx = attr_idx.objIndex()
1132
1133 attr_value = None
1134
1135 if type_idx == 0:
1136 attr_value = bool(iov_container.m_bool[obj_idx])
1137 elif type_idx == 1:
1138 attr_value = int(iov_container.m_char[obj_idx])
1139 elif type_idx == 2:
1140 attr_value = int(iov_container.m_unsignedChar[obj_idx])
1141 elif type_idx == 3:
1142 attr_value = int(iov_container.m_short[obj_idx])
1143 elif type_idx == 4:
1144 attr_value = int(iov_container.m_unsignedShort[obj_idx])
1145 elif type_idx == 5:
1146 attr_value = int(iov_container.m_int[obj_idx])
1147 elif type_idx == 6:
1148 attr_value = int(iov_container.m_unsignedInt[obj_idx])
1149 elif type_idx == 7:
1150 attr_value = int(iov_container.m_long[obj_idx])
1151 elif type_idx == 8:
1152 attr_value = int(iov_container.m_unsignedLong[obj_idx])
1153 elif type_idx == 9:
1154 attr_value = int(iov_container.m_longLong[obj_idx])
1155 elif type_idx == 10:
1156 attr_value = int(iov_container.m_unsignedLongLong[obj_idx])
1157 elif type_idx == 11:
1158 attr_value = float(iov_container.m_float[obj_idx])
1159 elif type_idx == 12:
1160 attr_value = float(iov_container.m_double[obj_idx])
1161 elif type_idx == 13:
1162 # skipping this type because is file IOVPayloadContainer_p1.h (line 120) is commented and not considered
1163 pass
1164 elif type_idx == 14:
1165 attr_value = str(iov_container.m_string[obj_idx])
1166 # Cleaning class name from value
1167 if attr_value.startswith('IOVMetaDataContainer_p1_'):
1168 attr_value = attr_value.replace('IOVMetaDataContainer_p1_', '')
1169 if attr_value.startswith('_'):
1170 attr_value = attr_value.replace('_', '/')
1171 # Now it is clean
1172 elif type_idx == 15:
1173 attr_value = int(iov_container.m_date[obj_idx])
1174 elif type_idx == 16:
1175 attr_value = int(iov_container.m_timeStamp[obj_idx])
1176 else:
1177 raise ValueError('Unknown type id {0} for attribute {1}'.format(type_idx, attr_name))
1178
1179 return attr_value
1180
1181
1182def _extract_fields_iov( iov_container, idx_range ):
1183 result = {}
1184 for idx in idx_range:
1185 attr_idx = iov_container.m_attrIndexes[idx]
1186 name_idx = attr_idx.nameIndex()
1187 attr_name = iov_container.m_attrName[name_idx]
1188 attr_value = _get_attribute_val(iov_container, attr_name, attr_idx)
1189
1190 if attr_name not in result:
1191 result[attr_name] = [attr_value]
1192 else:
1193 result[attr_name].append(attr_value)
1194
1195 max_element_count = 0
1196 for content in result.values():
1197 if len(content) > max_element_count:
1198 max_element_count = len(content)
1199
1200 if max_element_count <= 1:
1201 for name, content in result.items():
1202 if len(content) > 0:
1203 result[name] = content[0]
1204 else:
1205 result[name] = None
1206
1207 return result
1208
1209
1210def _extract_iov_for_run(iov_container, run):
1211 def extract_payload(iov_container, payload):
1212 result = {}
1213 for attrList in payload.m_attrLists:
1214 r = _extract_fields_iov( iov_container, range(attrList.m_firstIndex, attrList.m_lastIndex) )
1215 for name, content in r.items():
1216 if name not in result:
1217 result[name] = [content]
1218 elif content not in result[name]:
1219 result[name].append(content)
1220 # turn single element lists into single values
1221 for name, content in result.items():
1222 if len(content) == 1:
1223 result[name] = content[0]
1224 elif len(content) == 0:
1225 # to be consistent with the old code
1226 result[name] = None
1227 return result
1228
1229 for payload in iov_container.m_payloadVec:
1230 if run >= payload.m_start>>32 and run < payload.m_stop>>32:
1231 return extract_payload(iov_container, payload)
1232 # IOV for the requested run not found
1233 if len(iov_container.m_payloadVec) > 0:
1234 # print a warning only if there was actually some payload in the container
1235 msg.warning('No IOV found for run %d', run)
1236 return None
1237
1238
1239def _extract_iov_detailed(iov_container):
1240 def iovtostr(t):
1241 # break iov time into high and low halves (run number usually in the higher half)
1242 return "({h}:{l})".format(h=t>>32, l=t&(2**32-1))
1243
1244 def extract_list_collection(iov_container, listCollection ):
1245 result = {}
1246 ln = 0
1247 for list in listCollection.m_attrLists:
1248 ln = ln + 1
1249 lname = 'List {ln}: iov=[{s} ,{e}]; Channel#={ch}'.format(
1250 ln=ln, s=iovtostr(list.m_range.m_start),
1251 e=iovtostr(list.m_range.m_stop),
1252 ch=list.m_channelNumber )
1253 result[ lname ] = _extract_fields_iov( iov_container, range(list.m_firstIndex, list.m_lastIndex) )
1254 return result
1255
1256 result = {}
1257 pn = 0
1258 for listCollection in iov_container.m_payloadVec:
1259 pn = pn + 1
1260 pname = 'IOV range {n}: [{s}, {e}]'.format(n=pn, s=iovtostr(listCollection.m_start),
1261 e=iovtostr(listCollection.m_stop))
1262 result[ pname ] = extract_list_collection(iov_container, listCollection )
1263 return result
1264
1265
1267 return _convert_value(value.m_payload)
1268
1269
1271 result = {}
1272
1273 result['eventTypes'] = []
1274 for eventType in value.m_eventTypes:
1275 result['eventTypes'].append(_convert_value(eventType))
1276
1277 result['numberOfEvents'] = value.m_numberOfEvents
1278 result['runNumbers'] = list(value.m_runNumbers)
1279 result['lumiBlockNumbers'] = list(value.m_lumiBlockNumbers)
1280 result['processingTags'] = [str(v) for v in value.m_processingTags]
1281 result['itemList'] = []
1282
1283 # Get the class name in the repository with CLID <clid>
1284 from CLIDComps.clidGenerator import clidGenerator
1285 cgen = clidGenerator("")
1286 for clid, sgkey in value.m_itemList:
1287 if isinstance(sgkey, bytes):
1288 sgkey = sgkey.decode()
1289 else:
1290 sgkey = str(sgkey)
1291 result['itemList'].append((cgen.getNameFromClid(clid), sgkey))
1292
1293 return result
1294
1295
1297 result = {}
1298
1299 for ef_element in value:
1300 result[ef_element.first] = ef_element.second.className()
1301
1302 return result
1303
1304
1305def _extract_fields_cbk(interface=None, aux=None):
1306 """Extract CutBookkeeper content into dictionary
1307
1308 This function takes the CutBookkeeperContainer_v1 and CutBookkeeperAuxContainer_v1 objects.
1309 It makes sure the the interface object uses the auxiliary object as store.
1310 Args:
1311 interface (CutBookkeeperContainer_v1): the interface class
1312 aux (CutBookkeeperAuxContainer_v1): auxiliary container object
1313 Returns
1314 dict: with the cycle number and last stream
1315 """
1316 if not interface or not aux:
1317 return {}
1318 interface.setStore(aux)
1319
1320 max_cycle = -1
1321 input_stream = ''
1322
1323 for cbk in interface:
1324 current_cycle = int(cbk.cycle())
1325 if current_cycle > max_cycle:
1326 max_cycle = current_cycle
1327 input_stream = str(cbk.inputStream())
1328
1329 result = {
1330 'currentCutCycle': max_cycle,
1331 'currentCutInputStream': input_stream,
1332 }
1333 return result
1334
1335
1336def _extract_fields_fmd(interface=None, aux=None):
1337 """Turn static FileMetaData content into dictionary
1338
1339 This function takes the FileMetaData_v1 and FileMetaDataAuxInfo_v1 objects.
1340 It makes sure the the interface object uses the auxiliary object as store.
1341 Next the two static variables of FileMetaDataAuxInfo_v1 are retrieved and
1342 added to the dictionary that is returned.
1343 Args:
1344 interface (FileMetaData_v1): the interface class
1345 aux (FileMetaDataAuxInfo_v1): auxiliary container object
1346 Returns
1347 dict: with the production release and dataType
1348 """
1349 import ROOT
1350 if not interface or not aux:
1351 return {}
1352 interface.setStore(aux)
1353 metaContent = {
1354 "productionRelease": ROOT.std.string(),
1355 "dataType": ROOT.std.string(),
1356 "runNumbers": ROOT.std.vector('unsigned int')(),
1357 "lumiBlocks": ROOT.std.vector('unsigned int')(),
1358 }
1359 # Note: using this for dynamic attributes returns empty content
1360 for k, v in metaContent.items():
1361 try:
1362 interface.value(getattr(interface, k), v)
1363 except AttributeError:
1364 interface.value(k, v)
1365 # Now return python objects
1366 result = {k: str(v) for k, v in metaContent.items() if type(v) is ROOT.std.string}
1367 result.update({k: list(v) for k, v in metaContent.items() if type(v) is ROOT.std.vector('unsigned int')})
1368 return result
1369
1370
1371def _extract_fields_tmd(interface=None, aux=None):
1372 import ROOT
1373 BadAuxVarException = ROOT.SG.ExcBadAuxVar
1374 """Extract TruthMetaData content into dictionary
1375
1376 This function takes the TruthMetaDataContainer_v1 and TruthMetaDataAuxContainer_v1 objects.
1377 It makes sure the the interface object uses the auxiliary object as store.
1378 Args:
1379 interface (TruthMetaDataContainer_v1): the interface class
1380 aux (TruthMetaDataAuxContainer_v1): auxiliary container object
1381 Returns
1382 dict
1383 """
1384 if not interface or not aux:
1385 return {}
1386 interface.setStore(aux)
1387
1388 # return the first as we do not really expect more than one
1389 result = {}
1390 for tmd in interface:
1391 result['mcChannelNumber'] = tmd.mcChannelNumber()
1392
1393 try:
1394 result['weightNames'] = [str(v) for v in tmd.weightNames()]
1395 except BadAuxVarException:
1396 result['weightNames'] = []
1397
1398 try:
1399 result['lhefGenerator'] = str(tmd.lhefGenerator())
1400 except BadAuxVarException:
1401 result['lhefGenerator'] = ''
1402
1403 try:
1404 result['generators'] = str(tmd.generators())
1405 except BadAuxVarException:
1406 result['generators'] = ''
1407
1408 try:
1409 result['evgenProcess'] = str(tmd.evgenProcess())
1410 except BadAuxVarException:
1411 result['evgenProcess'] = ''
1412
1413 try:
1414 result['evgenTune'] = str(tmd.evgenTune())
1415 except BadAuxVarException:
1416 result['evgenTune'] = ''
1417
1418 try:
1419 result['hardPDF'] = str(tmd.hardPDF())
1420 except BadAuxVarException:
1421 result['hardPDF'] = ''
1422
1423 try:
1424 result['softPDF'] = str(tmd.softPDF())
1425 except BadAuxVarException:
1426 result['softPDF'] = ''
1427
1428 return result
1429
1430
1431""" Note: Deprecated. Legacy support for Run 2 AODs produced in release 21 or in release 22 prior to April 2021
1432"""
1434 if aux is None:
1435 return {}
1436
1437 L1Items = []
1438 HLTChains = []
1439
1440 try:
1441 interface.setStore( aux )
1442 if interface.size() > 0:
1443 # We make the assumption that the first stored SMK is
1444 # representative of all events in the input collection.
1445 firstMenu = interface.at(0)
1446 L1Items = [ _convert_value(item) for item in firstMenu.itemNames() ]
1447 HLTChains = [ _convert_value(chain) for chain in firstMenu.chainNames() ]
1448 except Exception as err: # noqa: F841
1449 msg.warning('Problem reading xAOD::TriggerMenu:')
1450
1451 result = {}
1452 result['L1Items'] = L1Items
1453 result['HLTChains'] = HLTChains
1454
1455 return result
1456
1458 result = {}
1459
1460 try:
1461 interface.setStore( aux )
1462 if interface.size() > 0:
1463 # We make the assumption that the first stored SMK is
1464 # representative of all events in the input collection.
1465 firstMenu = interface.at(0)
1466 import json
1467 decoded = json.loads(firstMenu.payload())
1468 result['RAWTriggerMenuJson'] = firstMenu.payload()
1469 result['name'] = firstMenu.name()
1470 result['dbkey'] = firstMenu.key()
1471 if decoded['filetype'] == 'hltmenu':
1472 result['HLTChains'] = [ _convert_value(chain) for chain in decoded['chains'] ]
1473 elif decoded['filetype'] == 'l1menu':
1474 result['L1Items'] = [ _convert_value(item) for item in decoded['items'] ]
1475 elif decoded['filetype'] in ['bunchgroupset', 'hltprescale', 'l1prescale', 'hltmonitoringsummary']:
1476 return result
1477
1478 else:
1479 msg.warning('Got an xAOD::TriggerMenuJson called {0} but only expecting hltmenu or l1menu'.format(decoded['filetype']))
1480 return {}
1481
1482 except Exception as err: # noqa: F841
1483 msg.warning('Problem reading xAOD::TriggerMenuJson')
1484
1485 return result
1486
1488 if 'user_type' in value:
1489 items = value['user_type'].split('#')[3:]
1490 for i in range(0, len(items), 2):
1491 value[items[i]] = _convert_value(items[i+1])
1492 return value
1493
1495
1496 types = None
1497 for key in value:
1498 if key == 'bit_mask':
1499 val = value[key]
1500
1501 bitmask_length = len(val)
1502
1503 is_simulation = False
1504 is_testbeam = False
1505 is_calibration = False
1506
1507 if bitmask_length > 0: # ROOT.EventType.IS_SIMULATION
1508 is_simulation = val[0]
1509
1510 if bitmask_length > 1: # ROOT.EventType.IS_TESTBEAM
1511 is_testbeam = val[1]
1512
1513 if bitmask_length > 2: # ROOT.EventType.IS_CALIBRATION:
1514 is_calibration = val[2]
1515
1516 types = [
1517 'IS_SIMULATION' if is_simulation else 'IS_DATA',
1518 'IS_TESTBEAM' if is_testbeam else 'IS_ATLAS',
1519 'IS_CALIBRATION' if is_calibration else 'IS_PHYSICS'
1520 ]
1521
1522 value['type'] = types
1523 return value
1524
1525
1526def make_lite(meta_dict):
1527 for filename, file_content in meta_dict.items():
1528 for key in file_content:
1529 if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
1530 for item in list(meta_dict[filename][key]):
1531 if item not in lite_primary_keys_to_keep:
1532 meta_dict[filename][key].pop(item)
1533
1534 if '/TagInfo' in file_content:
1535
1536
1537 for item in list(meta_dict[filename]['/TagInfo']):
1538 if item not in lite_TagInfo_keys_to_keep:
1539 meta_dict[filename]['/TagInfo'].pop(item)
1540 return meta_dict
1541
1542
1543def make_peeker(meta_dict):
1544 for filename, file_content in meta_dict.items():
1545 for key in file_content:
1546 if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
1547 keys_to_keep = [
1548 'lumiBlockNumbers',
1549 'runNumbers',
1550 'mc_event_number',
1551 'mc_channel_number',
1552 'eventTypes',
1553 'processingTags',
1554 'itemList'
1555 ]
1556 for item in list(meta_dict[filename][key]):
1557 if item not in keys_to_keep:
1558 meta_dict[filename][key].pop(item)
1559
1560 if '/TagInfo' in file_content:
1561 keys_to_keep = [
1562 'beam_energy',
1563 'beam_type',
1564 'GeoAtlas',
1565 'IOVDbGlobalTag',
1566 'AODFixVersion',
1567 'AMITag',
1568 'project_name',
1569 'triggerStreamOfFile',
1570 'AtlasRelease',
1571 'specialConfiguration',
1572 'mc_campaign',
1573 'hepmc_version',
1574 'generators',
1575 'keywords',
1576 'data_year',
1577 ]
1578 for item in list(meta_dict[filename]['/TagInfo']):
1579 if item not in keys_to_keep:
1580 meta_dict[filename]['/TagInfo'].pop(item)
1581
1582 if '/Simulation/Parameters' in file_content:
1583 keys_to_keep = [
1584 'G4Version',
1585 'TruthStrategy',
1586 'SimBarcodeOffset',
1587 'RegenerationIncrement',
1588 'TRTRangeCut',
1589 'SimulationFlavour',
1590 'Simulator',
1591 'PhysicsList',
1592 'SimulatedDetectors',
1593 'IsDataOverlay',
1594 ]
1595 for item in list(meta_dict[filename]['/Simulation/Parameters']):
1596 if item not in keys_to_keep:
1597 meta_dict[filename]['/Simulation/Parameters'].pop(item)
1598
1599 if '/Digitization/Parameters' in file_content:
1600 keys_to_keep = [
1601 'numberOfCollisions',
1602 'intraTrainBunchSpacing',
1603 'BeamIntensityPattern'
1604 'physicsList',
1605 'digiSteeringConf',
1606 'pileUp',
1607 'DigitizedDetectors',
1608 ]
1609 for item in list(meta_dict[filename]['/Digitization/Parameters']):
1610 if item not in keys_to_keep:
1611 meta_dict[filename]['/Digitization/Parameters'].pop(item)
1612
1613 if 'CutBookkeepers' in file_content:
1614 keys_to_keep = [
1615 'currentCutCycle',
1616 'currentCutInputStream',
1617 ]
1618 for item in list(meta_dict[filename]['CutBookkeepers']):
1619 if item not in keys_to_keep:
1620 meta_dict[filename]['CutBookkeepers'].pop(item)
1621
1622 if 'TruthMetaData' in file_content:
1623 keys_to_keep = [
1624 'mcChannelNumber',
1625 'weightNames',
1626 ]
1627 for item in list(meta_dict[filename]['TruthMetaData']):
1628 if item not in keys_to_keep:
1629 meta_dict[filename]['TruthMetaData'].pop(item)
1630
1631 return meta_dict
1632
1633
1634def promote_keys(meta_dict, mode):
1635 for filename, file_content in meta_dict.items():
1636 md = meta_dict[filename]
1637 for key in file_content:
1638 if key in md['metadata_items'] and regexEventStreamInfo.match(md['metadata_items'][key]):
1639 md.update(md[key])
1640
1641 if 'eventTypes' in md and len(md['eventTypes']):
1642 et = md['eventTypes'][0]
1643 md['mc_event_number'] = et.get('mc_event_number', md['runNumbers'][0])
1644 if 'mc_channel_number' in et:
1645 md['mc_channel_number'] = et.get('mc_channel_number', None)
1646 md['eventTypes'] = et['type']
1647
1648 # For very old files
1649 if 'GeoAtlas' in et:
1650 md['GeoAtlas'] = et.get('GeoAtlas', None)
1651 if 'IOVDbGlobalTag' in et:
1652 md['IOVDbGlobalTag'] = et.get('IOVDbGlobalTag', None)
1653
1654 if 'lumiBlockNumbers' in md[key]:
1655 md['lumiBlockNumbers'] = md[key]['lumiBlockNumbers']
1656
1657 if 'processingTags' in md[key]:
1658 md['processingTags'] = md[key]['processingTags']
1659
1660 meta_dict[filename].pop(key)
1661 break
1662
1663 if not isGaudiEnv() and key in md['metadata_items'] and 'FileMetaData' in key:
1664 if 'beamType' in md[key]:
1665 md['beam_type'] = md[key]['beamType']
1666
1667 if 'runNumbers' in md[key]:
1668 md['runNumbers'] = md[key]['runNumbers']
1669
1670 if 'mcProcID' in md[key]:
1671 md['mc_channel_number'] = int(md[key]['mcProcID'])
1672
1673 if 'mcCampaign' in md[key]:
1674 md['mc_campaign'] = md[key]['mcCampaign']
1675
1676 if 'dataYear' in md[key]:
1677 md['data_year'] = int(md[key]['dataYear'])
1678
1679 if 'lumiBlocks' in md[key]:
1680 md['lumiBlockNumbers'] = md[key]['lumiBlocks']
1681
1682 if mode == 'peeker' and 'amiTag' in md[key]:
1683 md['AMITag'] = md[key]['amiTag']
1684
1685 if 'beamEnergy' in md[key]:
1686 md['beam_energy'] = int(md[key]['beamEnergy'])
1687
1688 if 'geometryVersion' in md[key]:
1689 md['GeoAtlas'] = md[key]['geometryVersion']
1690
1691 # EventType checks
1692 md['eventTypes'] = []
1693 if mode == 'peeker' and 'simFlavour' in md[key]:
1694 md['SimulationFlavour'] = md[key]['simFlavour']
1695
1696 if mode == 'peeker' and 'isDataOverlay' in md[key]:
1697 md['IsDataOverlay'] = md[key]['isDataOverlay']
1698
1699 if 'dataType' in md[key]:
1700 md['processingTags'] = [md[key]['dataType']]
1701
1702 if (
1703 ('simFlavour' in md[key] and ('FullG4' in md[key]['simFlavour'] or 'ATLFAST' in md[key]['simFlavour']))
1704 or 'DAOD_TRUTH' in md[key]['dataType']
1705 ):
1706 md['eventTypes'].append('IS_SIMULATION')
1707 else:
1708 md['eventTypes'].append('IS_DATA')
1709
1710 if (
1711 'GeoAtlas' in md and 'ATLAS' in md['GeoAtlas']
1712 or 'DAOD_TRUTH' in md[key]['dataType']
1713 ):
1714 md['eventTypes'].append('IS_ATLAS')
1715 # this is probably safe to assume for all files used in AnalysisBase
1716 md['eventTypes'].append('IS_PHYSICS')
1717 else:
1718 md['eventTypes'].append('IS_TESTBEAM')
1719
1720 if mode == 'peeker':
1721 if 'productionRelease' in md[key]:
1722 md['AtlasRelease'] = md[key]['productionRelease']
1723
1724 if 'generatorsInfo' in md[key]:
1725 md['generators'] = md[key]['generatorsInfo']
1726
1727 if mode == 'lite':
1728 meta_dict[filename].pop(key)
1729 break
1730
1731 if '/TagInfo' in file_content:
1732 md.update(md['/TagInfo'])
1733 md.pop('/TagInfo')
1734
1735 if '/Generation/Parameters' in file_content:
1736 md.update(md['/Generation/Parameters'])
1737 md.pop('/Generation/Parameters')
1738
1739 if '/Simulation/Parameters' in file_content:
1740 md.update(md['/Simulation/Parameters'])
1741 md.pop('/Simulation/Parameters')
1742
1743 if '/Digitization/Parameters' in file_content:
1744 md.update(md['/Digitization/Parameters'])
1745 md.pop('/Digitization/Parameters')
1746
1747 if 'CutBookkeepers' in file_content:
1748 md.update(md['CutBookkeepers'])
1749 md.pop('CutBookkeepers')
1750
1751 return meta_dict
1752
1753
1754def convert_itemList(metadata, layout):
1755 """
1756 This function will rearrange the itemList values to match the format of 'eventdata_items', 'eventdata_itemsList'
1757 or 'eventdata_itemsDic' generated with the legacy file peeker tool
1758 :param metadata: a dictionary obtained using read_metadata method.
1759 The mode for read_metadata must be 'peeker of 'full'
1760 :param layout: the mode in which the data will be converted:
1761 * for 'eventdata_items' use: layout= None
1762 * for 'eventdata_itemsList' use: layout= '#join'
1763 * for 'eventdata_itemsDic' use: layout= 'dict'
1764 """
1765
1766 # Find the itemsList:
1767 item_list = None
1768
1769 if 'itemList' in metadata:
1770 item_list = metadata['itemList']
1771 else:
1772
1773 current_key = None
1774
1775 for key in metadata:
1776 if 'metadata_items' in metadata and key in metadata['metadata_items'] and metadata['metadata_items'][key] == 'EventStreamInfo_p3':
1777 current_key = key
1778 break
1779 if current_key is not None:
1780 item_list = metadata[current_key]['itemList']
1781
1782 if item_list is not None:
1783
1784 if layout is None:
1785 return item_list
1786
1787 elif layout == '#join':
1788 return [k + '#' + v for k, v in item_list if k]
1789
1790
1791 elif layout == 'dict':
1792 from collections import defaultdict
1793 dic = defaultdict(list)
1794
1795 for k, v in item_list:
1796 dic[k].append(v)
1797
1798 return dict(dic)
1799
1800
1802 """Extract number of entries from DataHeader.
1803
1804 infile ROOT TFile object or filename string
1805 return Number of entries as returned by DataHeader object in infile,
1806 None in absence of DataHeader object
1807 """
1808 import ROOT
1809 from PyUtils.PoolFile import PoolOpts
1810 if not isinstance(infile, ROOT.TFile):
1811 infile = ROOT.TFile.Open(infile)
1812
1813 for name in {PoolOpts.TTreeNames.DataHeader, PoolOpts.RNTupleNames.DataHeader}:
1814 obj = infile.Get(name)
1815 msg.debug(f"dataheader_nentries: {name=}, {obj=}, {type(obj)=}")
1816 if not obj:
1817 continue
1818 if isinstance(obj, ROOT.TTree):
1819 return obj.GetEntriesFast()
1820 else:
1821 # check early to avoid scary ROOT read errors
1822 if ROOT.gROOT.GetVersionInt() < 63100:
1823 raise RuntimeError("ROOT ver. 6.31/01 or greater needed to read RNTuple files")
1824 if isRNTuple(obj):
1825 try:
1826 return ROOT.Experimental.RNTupleReader.Open(obj).GetNEntries()
1827 except AttributeError:
1828 return ROOT.RNTupleReader.Open(obj).GetNEntries()
1829 else:
1830 raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
1831
1832def get_meta_filter(mode="lite", meta_key_filter=None) -> dict:
1833 """Return a dictionary of metadata filters based on the mode and
1834 optional meta_key_filter.
1835 """
1836
1837 if meta_key_filter is None:
1838 meta_key_filter = []
1839
1840 # create a container for the list of filters used for the lite version
1841 meta_filter = {}
1842
1843 # set the filters for name
1844 if mode == 'lite':
1845 if isGaudiEnv():
1846 meta_filter = {
1847 '/TagInfo': 'IOVMetaDataContainer_p1',
1848 'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
1849 '*': 'EventStreamInfo_p*'
1850 }
1851 else:
1852 meta_filter = {
1853 'FileMetaData': '*',
1854 'FileMetaDataAux.': 'xAOD::FileMetaDataAuxInfo_v1',
1855 }
1856
1857 # set the filters for name
1858 if mode == 'peeker':
1859 meta_filter.update({
1860 'TriggerMenu': 'DataVector<xAOD::TriggerMenu_v1>', # R2 trigger metadata format AOD (deprecated)
1861 'TriggerMenuAux.': 'xAOD::TriggerMenuAuxContainer_v1',
1862 'DataVector<xAOD::TriggerMenu_v1>_TriggerMenu': 'DataVector<xAOD::TriggerMenu_v1>', # R2 trigger metadata format ESD (deprecated)
1863 'xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.': 'xAOD::TriggerMenuAuxContainer_v1',
1864 'TriggerMenuJson_HLT': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1865 'TriggerMenuJson_HLTAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1866 'TriggerMenuJson_HLTMonitoring': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1867 'TriggerMenuJson_HLTMonitoringAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1868 'TriggerMenuJson_HLTPS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1869 'TriggerMenuJson_HLTPSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1870 'TriggerMenuJson_L1': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1871 'TriggerMenuJson_L1Aux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1872 'TriggerMenuJson_L1PS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1873 'TriggerMenuJson_L1PSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1874 'CutBookkeepers': 'xAOD::CutBookkeeperContainer_v1',
1875 'CutBookkeepersAux.': 'xAOD::CutBookkeeperAuxContainer_v1',
1876 'FileMetaData': '*',
1877 'FileMetaDataAux.': 'xAOD::FileMetaDataAuxInfo_v1',
1878 'TruthMetaData': '*',
1879 'TruthMetaDataAux.': 'xAOD::TruthMetaDataAuxContainer_v1',
1880 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1881 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1882 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1883 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTMonitoringAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1884 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1885 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTPSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1886 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1887 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1Aux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1888 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1889 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1PSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1'
1890 })
1891
1892 if isGaudiEnv():
1893 meta_filter.update({
1894 '/TagInfo': 'IOVMetaDataContainer_p1',
1895 'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
1896 '/Simulation/Parameters': 'IOVMetaDataContainer_p1',
1897 '/Digitization/Parameters': 'IOVMetaDataContainer_p1',
1898 '/EXT/DCS/MAGNETS/SENSORDATA': 'IOVMetaDataContainer_p1',
1899 '*': 'EventStreamInfo_p*'
1900 })
1901
1902 if (mode == 'full' or mode == 'iov') and meta_key_filter:
1903 meta_filter = {f: '*' for f in meta_key_filter}
1904
1905 return meta_filter
1906
1908 """
1909 Convert canonical/C++ STL types in the metadata_items dictionary back to their
1910 ROOT equivalents for backward compatibility.
1911 - 'float' => 'Float_t'
1912 - 'char' => 'Char_t'
1913 - 'std::string' => 'string'
1914 - 'xAOD::FileMetaData_v1' => 'FileMetaData'
1915 - 'xAOD::FileMetaDataAuxInfo_v1' => 'FileMetaDataAux'
1916 (add more as needed)
1917 """
1918 type_map = {
1919 "float": "Float_t",
1920 "char": "Char_t",
1921 "std::string": "string",
1922 "std::uint32_t": "UInt_t",
1923 "xAOD::FileMetaData_v1": "FileMetaData",
1924 }
1925 denormalized = {}
1926 for k, v in metadata_dict.items():
1927 new_v = v
1928 for old, new in type_map.items():
1929 if new_v == old:
1930 new_v = new
1931 elif new_v.endswith("." + old):
1932 new_v = new_v.rsplit(".", 1)[0] + "." + new
1933 denormalized[k] = new_v
1934 return denormalized
1935
1936
1937def should_keep_meta(normalizedName, typeName, meta_filter):
1938 """
1939 Helper function to determine if metadata should be kept based on meta_filter.
1940 """
1941 if len(meta_filter) == 0:
1942 return True
1943
1944 for filter_key, filter_class in meta_filter.items():
1945 if (
1946 filter_key.replace("/", "_") in normalizedName.replace("/", "_")
1947 or filter_key == "*"
1948 ) and fnmatchcase(typeName, filter_class):
1949 if "CutBookkeepers" in filter_key:
1950 keep = filter_key == normalizedName
1951 if keep:
1952 return True
1953 else:
1954 return True
1955 return False
STL class.
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition hcg.cxx:312
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:179
promote_keys(meta_dict, mode)
_extract_fields_esi(value)
denormalize_metadata_types(metadata_dict)
_extract_iov_for_run(iov_container, run)
_extract_fields_cbk(interface=None, aux=None)
dataheader_nentries(infile)
should_keep_meta(normalizedName, typeName, meta_filter)
_extract_fields_fmd(interface=None, aux=None)
_get_attribute_val(iov_container, attr_name, attr_idx)
_extract_fields_ef(value)
make_lite(meta_dict)
_extract_fields_iovmdc(value)
_extract_iov_detailed(iov_container)
_extract_fields_iov(iov_container, idx_range)
_extract_fields_tmd(interface=None, aux=None)
_convert_value(value, aux=None)
convert_itemList(metadata, layout)
_extract_fields_triggermenujson(interface, aux)
_get_pfn(filename)
read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter=None, unique_tag_info_values=True, ignoreNonExistingLocalFiles=False)
Definition MetaReader.py:71
_convert_event_type_user_type(value)
_read_guid(filename)
make_peeker(meta_dict)
_convert_event_type_bitmask(value)
dict get_meta_filter(mode="lite", meta_key_filter=None)
_extract_fields_triggermenu(interface, aux)