71 unique_tag_info_values = True, ignoreNonExistingLocalFiles=False):
72 """
73 This tool is independent of Athena framework and returns the metadata from a given file.
74 :param filenames: the input file from which metadata needs to be extracted.
75 :param file_type: the type of file. POOL or BS (bytestream: RAW, DRAW) files.
76 :param mode: if true, will return all metadata associated with the filename. By default, is false and this will
77 return a "tiny" version which have only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'.
78 :return: a dictionary of metadata for the given input file.
79 """
80
81
82 global _gbl_mode
83 _gbl_mode = mode
84
85 from RootUtils import PyROOTFixes
86
87
88 if isinstance(filenames, str):
89 filenames = [filenames]
90
91
92 if file_type is not None:
93 if file_type not in ('POOL', 'BS'):
94 raise NameError('Allowed values for \'file_type\' parameter are: "POOL" or "BS": you provided "' + file_type + '"')
95 else:
96 msg.info('Forced file_type: {0}'.format(file_type))
97
98
99 if mode not in ('tiny', 'lite', 'full', 'peeker', 'iov'):
100 raise NameError('Allowed values for "mode" parameter are: "tiny", "lite", "peeker", "iov" or "full"')
101
102 if meta_key_filter is None:
103 meta_key_filter = []
104
105
106 if not isGaudiEnv():
107 if mode in ('full', 'iov'):
108 raise NameError('The following modes are not available in AnalysisBase: "iov" and "full"')
109
110 msg.info('Current mode used: {0}'.format(mode))
111 msg.info('Current filenames: {0}'.format(filenames))
112
113 if mode != 'full' and mode !='iov' and len(meta_key_filter) > 0:
114 raise NameError('It is possible to use the meta_key_filter option only for full mode')
115 if meta_key_filter:
116 msg.info('Filter used: {0}'.format(meta_key_filter))
117
118
119 meta_dict = {}
120
121
122 for filename in filenames:
123 meta_dict[filename] = {}
124 current_file_type = None
125
126 if not file_type:
127 if os.path.isfile(filename):
128
129 if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and gSystem.AccessPathName(filename):
130 msg.warn('Ignoring not accessible file: {}'.format(filename))
131 continue
132
133 with open(filename, 'rb') as binary_file:
134 magic_file = binary_file.read(4)
135
136 if magic_file == 'root' or magic_file == b'root':
137 current_file_type = 'POOL'
138 meta_dict[filename]['file_type'] = 'POOL'
139
140 elif Project.determine() in (
141 Project.AnalysisBase, Project.AthAnalysis):
142 raise RuntimeError(
143 f"{filename} is not a ROOT file, assumed bytestream"
144 ", this is not supported in Analysis releases")
145 else:
146 current_file_type = 'BS'
147 meta_dict[filename]['file_type'] = 'BS'
148
149
150 meta_dict[filename]['file_size'] = os.path.getsize(filename)
151
152
153 else:
154 if regex_BS_files.match(filename):
155 current_file_type = 'BS'
156 meta_dict[filename]['file_type'] = 'BS'
157 else:
158 current_file_type = 'POOL'
159 meta_dict[filename]['file_type'] = 'POOL'
160
161
162 meta_dict[filename]['file_size'] = None
163
164 else:
165 current_file_type = file_type
166
167
168 if current_file_type == 'POOL':
169
170 if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and gSystem.AccessPathName(filename):
171 msg.warn('Ignoring not accessible file: {}'.format(filename))
172 continue
173
174 import ROOT
175
176 current_file = ROOT.TFile.Open( _get_pfn(filename) )
177
178
179 from PyUtils.PoolFile import PoolOpts
180 collectionTree = current_file.Get(PoolOpts.TTreeNames.EventData)
181 if isinstance(collectionTree, ROOT.TTree):
182 meta_dict[filename]['auto_flush'] = collectionTree.GetAutoFlush()
183
184
185 meta_dict[filename]['file_guid'] = _read_guid(filename)
186
187
188 meta_dict[filename]['file_comp_alg'] = current_file.GetCompressionAlgorithm()
189 meta_dict[filename]['file_comp_level'] = current_file.GetCompressionLevel()
190
191 if (
192 isRNTuple(md:=current_file.Get(PoolOpts.RNTupleNames.MetaData))
193 and mode != "tiny"
194 ):
195 msg.warning(
196 "Reading in-file metadata from RNTuple is currently of limited support"
197 )
198 meta_dict[filename]["metadata_items"] = {}
199
200 try:
201 from ROOT import RNTupleReader
202 except ImportError:
203 from ROOT.Experimental import RNTupleReader
204
205 reader = RNTupleReader.Open(md)
206 entry = reader.CreateEntry()
207 reader.LoadEntry(0, entry)
208 auxes = {}
209 classes_with_aux = {
210 "xAOD::FileMetaData_v1",
211 "xAOD::FileMetaDataAuxInfo_v1",
212 "xAOD::TriggerMenuJsonAuxContainer_v1",
213 "DataVector<xAOD::TriggerMenuJson_v1>",
214 "xAOD::TruthMetaDataAuxContainer_v1",
215 "DataVector<xAOD::TruthMetaData_v1>",
216 "xAOD::CutBookkeeperContainer_v1",
217 "xAOD::CutBookkeeperAuxContainer_v1",
218 "xAOD::LumiBlockRangeAuxContainer_v1",
219 "DataVector<xAOD::LumiBlockRange_v1>",
220 }
221
222 dynamic_fmd_items = {}
223
224 meta_filter = get_meta_filter(mode, meta_key_filter)
225
226 for field in reader.GetDescriptor().GetTopLevelFields():
227 normalizedName = field.GetFieldName()
228 if "index_ref" in normalizedName:
229 continue
230 if regexIOVMetaDataContainer.match(field.GetTypeName()):
231
232
233 normalizedName = (
234 field.GetFieldName()
235 .
replace(
"IOVMetaDataContainer_p1_",
"")
237 )
238 meta_dict[filename]["metadata_items"][normalizedName] = (
239 "IOVMetaDataContainer"
240 )
241 elif regexByteStreamMetadataContainer.match(field.GetTypeName()):
242 meta_dict[filename]["metadata_items"][field.GetFieldName()] = (
243 "ByteStreamMetadataContainer"
244 )
245 elif regexEventStreamInfo.match(field.GetTypeName()):
246 meta_dict[filename]["metadata_items"][field.GetFieldName()] = (
247 "EventStreamInfo"
248 )
249 elif regexXAODFileMetaData.match(field.GetTypeName()):
250 meta_dict[filename]["metadata_items"][
251 field.GetFieldName().
replace(
"xAOD__",
"xAOD::")
252 ] = field.GetTypeName()
253 elif regexXAODFileMetaDataAuxDyn.match(
254 normalizedName := field.GetFieldName()
257 ):
258 result = (
259 False
260 if entry[field.GetFieldName()] == "\x00"
261 else entry[field.GetFieldName()]
262 )
263 dynamic_fmd_items[normalizedName.split(".")[1]] = result
264 meta_dict[filename]["metadata_items"][normalizedName] = (
265 field.GetTypeName()
266 )
267 continue
268 elif regexXAODFileMetaDataAux.match(field.GetTypeName()):
269 meta_dict[filename]["metadata_items"][
270 field.GetFieldName()
273 ] = field.GetTypeName()
274 elif regexXAODTruthMetaData.match(field.GetTypeName()):
275 meta_dict[filename]["metadata_items"][
276 field.GetFieldName()
278 .
replace(
"DataVector_",
"DataVector<")
280 ] = "TruthMetaData"
281 elif regexXAODTruthMetaDataAux.match(field.GetTypeName()):
282 meta_dict[filename]["metadata_items"][
283 field.GetFieldName()
286 ] = field.GetTypeName()
287 elif regexXAODEventFormat.match(field.GetTypeName()):
288 meta_dict[filename]["metadata_items"][
289 field.GetFieldName().
replace(
"xAOD__",
"xAOD::")
290 ] = field.GetTypeName()
291 elif regexXAODTriggerMenuJson.match(field.GetTypeName()):
292 meta_dict[filename]["metadata_items"][
293 field.GetFieldName()
295 .
replace(
"DataVector_",
"DataVector<")
296 .
replace(
"__Trigger",
">_Trigger")
297 ] = field.GetTypeName()
298 elif regexXAODTriggerMenuJsonAux.match(field.GetTypeName()):
299 meta_dict[filename]["metadata_items"][
300 field.GetFieldName()
303 ] = field.GetTypeName()
304 elif regexXAODCutBookkeeperContainer.match(field.GetTypeName()):
305 meta_dict[filename]["metadata_items"][
306 field.GetFieldName()
308 .
replace(
"DataVector_",
"DataVector<")
309 .
replace(
"__CutBookkeeper",
">_CutBookkeeper")
310 ] = field.GetTypeName()
311 elif regexXAODCutBookkeeperContainerAux.match(field.GetTypeName()):
312 meta_dict[filename]["metadata_items"][
313 field.GetFieldName()
316 ] = field.GetTypeName()
317 else:
318 meta_dict[filename]["metadata_items"][
319 field.GetFieldName().
replace(
"Aux:",
"Aux.")
320 ] = field.GetTypeName()
321
322 if field.GetTypeName() in classes_with_aux:
323
324 auxes[field.GetFieldName()] = field.GetTypeName()
325 continue
326
327 if not should_keep_meta(
328 normalizedName, field.GetTypeName(), meta_filter
329 ):
330 continue
331
332 try:
333 meta_dict[filename][normalizedName] = _convert_value(
334 entry[field.GetFieldName()]
335 )
336 except KeyError:
337 msg.warning(f"missing type {field.GetTypeName()}")
338
339 meta_dict[filename]["metadata_items"] = denormalize_metadata_types(
340 meta_dict[filename]["metadata_items"]
341 )
342
343 def _get_aux_base(aux_key: str) -> str:
344
345 key = aux_key
346 key = key.replace("xAOD__TriggerMenuJsonAuxContainer_v1_", "")
347 key = key.replace("xAOD__FileMetaDataAuxInfo_v1_", "")
348 key = key.replace("xAOD__TruthMetaDataAuxContainer_v1_", "")
349
350 if key.endswith("Aux:"):
351 key = key[:-4]
352 elif key.endswith("Aux"):
353 key = key[:-3]
354
355 key = key.strip("_:")
356 return key
357
358 def _get_main_base(main_key: str) -> str:
359 main_base = main_key
360
361 if main_key.startswith("DataVector_xAOD__TriggerMenuJson_v1__"):
362 main_base = main_key.replace(
363 "DataVector_xAOD__TriggerMenuJson_v1__", ""
364 )
365
366 elif main_key.startswith("xAOD__FileMetaData_v1_"):
367 main_base = main_key.replace("xAOD__FileMetaData_v1_", "")
368
369 elif main_key.startswith("DataVector_xAOD__TruthMetaData_v1__"):
370 main_base = main_key.replace(
371 "DataVector_xAOD__TruthMetaData_v1__", ""
372 )
373 return main_base
374
375 def _find_associated_pairs(auxes: dict) -> list[tuple[str, str]]:
376
377 aux_map = {}
378 for k in auxes:
379 if "Aux" in k:
380 aux_map[_get_aux_base(k)] = k
381
382 main_map = {}
383 for k in auxes:
384 base = _get_main_base(k)
385 if base:
386 main_map[base] = k
387
388
389 pairs = []
390 for base, aux_key in aux_map.items():
391 if base in main_map:
392 pairs.append((aux_key, main_map[base]))
393 return pairs
394
395 for pair in _find_associated_pairs(auxes):
396 return_obj = _convert_value(
397 entry[pair[1]],
398 entry[pair[0]],
399 )
400 key = next(
401 (
402 k
403 for k, v in trigger_menu_json_map.items()
404 if v
405 == pair[1]
407 .
replace(
"DataVector_",
"DataVector<")
408 .
replace(
"__Trigger",
">_Trigger")
409 ),
410 auxes[pair[0]],
411 )
412
413 try:
414 key = (
415 key.replace("xAOD__", "xAOD::")
416 if key.count("_") <= 1
417 else key.replace("xAOD__", "xAOD::").rsplit("_", 2)[0]
418 )
419 except IndexError:
420 pass
421
422 if not should_keep_meta(
423 pair[0]
425 .
replace(
"DataVector_",
"DataVector<")
426 .
replace(
"__Trigger",
">_Trigger")
428 key,
429 meta_filter,
430 ):
431 continue
432
433 if "TriggerMenuJson" in pair[0]:
434 if "RAWTriggerMenuJson" in return_obj:
435 key = (
436 pair[1]
437 if pair[0].startswith("Trigger")
438 else trigger_menu_json_map[pair[0]]
439 )
440 meta_dict[filename][key] = return_obj["RAWTriggerMenuJson"]
441 del return_obj["RAWTriggerMenuJson"]
442 if "TriggerConfigInfo" not in meta_dict[filename]:
443 meta_dict[filename]["TriggerConfigInfo"] = {}
444 if "dbkey" in return_obj:
445 meta_dict[filename]["TriggerConfigInfo"][
447 ] = {"key": return_obj["dbkey"], "name": return_obj["name"]}
448 del return_obj["dbkey"]
449 del return_obj["name"]
450 if "TriggerMenu" not in meta_dict[filename]:
451 meta_dict[filename]["TriggerMenu"] = {}
452 meta_dict[filename]["TriggerMenu"].update(return_obj)
453 elif "FileMetaData" in pair[0]:
454 if "FileMetaData" not in meta_dict[filename]:
455 meta_dict[filename]["FileMetaData"] = dynamic_fmd_items
456 meta_dict[filename]["FileMetaData"].update(return_obj)
457 elif "TruthMetaData" in pair[0]:
458 if pair == ("TruthMetaDataAux:", "TruthMetaData"):
459 if "TruthMetaData" not in meta_dict[filename]:
460 meta_dict[filename]["TruthMetaData"] = {}
461 meta_dict[filename]["TruthMetaData"].update(return_obj)
462 else:
463
464 meta_dict[filename][
465 pair[1]
467 .
replace(
"DataVector_",
"DataVector<")
469 ] = {}
470 meta_dict[filename][
471 pair[0]
474 ] = {}
475 elif pair == ("CutBookkeepersAux:", "CutBookkeepers"):
476 meta_dict[filename]["CutBookkeepers"] = return_obj
477
478 msg.debug(f"Read metadata from RNTuple: {meta_dict[filename]}")
479
480 else:
481
482 if mode != 'tiny':
483
484 metadata_tree = current_file.Get('MetaData')
485
486 metadata_branches = metadata_tree.GetListOfBranches()
487 nr_of_branches = metadata_branches.GetEntriesFast()
488
489
490 meta_dict[filename]['metadata_items'] = {}
491
492 meta_filter = get_meta_filter(mode, meta_key_filter)
493
494
495 persistent_instances = {}
496 dynamic_fmd_items = {}
497
498
499 if not isGaudiEnv():
500 metadata_tree.SetBranchStatus("*", False)
501
502 for i in range(0, nr_of_branches):
503 branch = metadata_branches.At(i)
504 name = branch.GetName()
505 if name == 'index_ref':
506
507 continue
508
509 class_name = branch.GetClassName()
510
511 if regexIOVMetaDataContainer.match(class_name):
512 name = name.replace(
'IOVMetaDataContainer_p1_',
'').
replace(
'_',
'/')
513
514 if regexIOVMetaDataContainer.match(class_name):
515 meta_dict[filename]['metadata_items'][name] = 'IOVMetaDataContainer'
516 elif regexByteStreamMetadataContainer.match(class_name):
517 meta_dict[filename]['metadata_items'][name] = 'ByteStreamMetadataContainer'
518 elif regexEventStreamInfo.match(class_name):
519 meta_dict[filename]['metadata_items'][name] = 'EventStreamInfo'
520 elif regexXAODFileMetaData.match(class_name):
521 meta_dict[filename]['metadata_items'][name] = 'FileMetaData'
522 elif regexXAODTruthMetaData.match(class_name):
523 meta_dict[filename]['metadata_items'][name] = 'TruthMetaData'
524 else:
525 type_name = class_name
526 if not type_name:
527 try:
528 type_name = branch.GetListOfLeaves()[0].GetTypeName()
529 except IndexError:
530 pass
531 meta_dict[filename]['metadata_items'][name] = type_name
532
533 if len(meta_filter) > 0:
534 keep = False
535 for filter_key, filter_class in meta_filter.items():
536 if (filter_key.replace('/', '_') in name.replace('/', '_') or filter_key == '*') and fnmatchcase(class_name, filter_class):
537 if 'CutBookkeepers' in filter_key:
538 keep = filter_key == name
539 if keep:
540 break
541 else:
542 keep = True
543 break
544
545 if not keep:
546 continue
547 else:
548
549 if 'CutBookkeepers' in name and name not in ['CutBookkeepers', 'CutBookkeepersAux.']:
550 continue
551
552 if not isGaudiEnv():
553 metadata_tree.SetBranchStatus(f"{name}*", True)
554
555
556 if regexEventStreamInfo.match(class_name):
557 if class_name.endswith('_p1'):
558 persistent_instances[name] = ROOT.EventStreamInfo_p1()
559 elif class_name.endswith('_p2'):
560 persistent_instances[name] = ROOT.EventStreamInfo_p2()
561 else:
562 persistent_instances[name] = ROOT.EventStreamInfo_p3()
563 elif regexIOVMetaDataContainer.match(class_name):
564 persistent_instances[name] = ROOT.IOVMetaDataContainer_p1()
565 elif regexXAODEventFormat.match(class_name):
566 persistent_instances[name] = ROOT.xAOD.EventFormat_v1()
567 elif regexXAODTriggerMenu.match(class_name) and _check_project() not in ['AthGeneration']:
568 persistent_instances[name] = ROOT.xAOD.TriggerMenuContainer_v1()
569 elif regexXAODTriggerMenuAux.match(class_name) and _check_project() not in ['AthGeneration']:
570 persistent_instances[name] = ROOT.xAOD.TriggerMenuAuxContainer_v1()
571 elif regexXAODTriggerMenuJson.match(class_name) and _check_project() not in ['AthGeneration']:
572 persistent_instances[name] = ROOT.xAOD.TriggerMenuJsonContainer_v1()
573 elif regexXAODTriggerMenuJsonAux.match(class_name) and _check_project() not in ['AthGeneration']:
574 persistent_instances[name] = ROOT.xAOD.TriggerMenuJsonAuxContainer_v1()
575 elif regexXAODCutBookkeeperContainer.match(class_name):
576 persistent_instances[name] = ROOT.xAOD.CutBookkeeperContainer_v1()
577 elif regexXAODCutBookkeeperContainerAux.match(class_name):
578 persistent_instances[name] = ROOT.xAOD.CutBookkeeperAuxContainer_v1()
579 elif regexXAODFileMetaData.match(class_name):
580 persistent_instances[name] = ROOT.xAOD.FileMetaData_v1()
581 elif regexXAODFileMetaDataAux.match(class_name):
582 persistent_instances[name] = ROOT.xAOD.FileMetaDataAuxInfo_v1()
583 elif regexXAODTruthMetaData.match(class_name):
584 persistent_instances[name] = ROOT.xAOD.TruthMetaDataContainer_v1()
585 elif regexXAODTruthMetaDataAux.match(class_name):
586 persistent_instances[name] = ROOT.xAOD.TruthMetaDataAuxContainer_v1()
587
588 if name in persistent_instances:
589 branch.SetAddress(ROOT.AddressOf(persistent_instances[name]))
590
591
592 dynamicFMD = regexXAODFileMetaDataAuxDyn.match(name)
593 if dynamicFMD:
594 dynamicName = dynamicFMD.group().
split(
'.')[-1]
595 dynamicType = regex_cppname.match(class_name)
596 if dynamicType:
597
598 dynamic_fmd_items[dynamicName] = ROOT.std.string()
599 branch.SetAddress(ROOT.AddressOf(dynamic_fmd_items[dynamicName]))
600 else:
601 dynamic_fmd_items[dynamicName] = None
602
603
604 metadata_tree.GetEntry(0)
605
606
607 for key in dynamic_fmd_items:
608 if dynamic_fmd_items[key] is None:
609 try:
610 if key.startswith("is"):
611
612 dynamic_fmd_items[key] = getattr(metadata_tree, key) != '\x00'
613 else:
614
615 dynamic_fmd_items[key] = getattr(metadata_tree, key)
616 except AttributeError:
617
618 pass
619 else:
620
621 dynamic_fmd_items[key] = str(dynamic_fmd_items[key])
622
623
624 if meta_key_filter:
625 meta_dict[filename] = {}
626
627
628 for name, content in persistent_instances.items():
629 key = name
630 if hasattr(content, 'm_folderName'):
631 key = content.m_folderName
632
633
634 has_r3_trig_meta = ('TriggerMenuJson_HLT' in persistent_instances or 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT' in persistent_instances)
635 aux = None
636 if key.startswith('TriggerMenuJson_') and not key.endswith('Aux.'):
637 aux = persistent_instances[key+'Aux.']
638 elif key.startswith('DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_') and not key.endswith('Aux.'):
639 menuPart = key.split('_')[-1]
640 aux = persistent_instances['xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_'+menuPart+'Aux.']
641 elif key == 'TriggerMenu' and 'TriggerMenuAux.' in persistent_instances and not has_r3_trig_meta:
642 aux = persistent_instances['TriggerMenuAux.']
643 elif key == 'DataVector<xAOD::TriggerMenu_v1>_TriggerMenu' and 'xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.' in persistent_instances and not has_r3_trig_meta:
644 aux = persistent_instances['xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.']
645 elif (key == 'CutBookkeepers'
646 and 'CutBookkeepersAux.' in persistent_instances):
647 aux = persistent_instances['CutBookkeepersAux.']
648 elif key == 'CutBookkeepersAux.':
649 continue
650 elif (key == 'FileMetaData'
651 and 'FileMetaDataAux.' in persistent_instances):
652 aux = persistent_instances['FileMetaDataAux.']
653 elif (key == 'xAOD::FileMetaData_v1_FileMetaData'
654 and 'xAOD::FileMetaDataAuxInfo_v1_FileMetaDataAux.' in persistent_instances):
655 aux = persistent_instances['xAOD::FileMetaDataAuxInfo_v1_FileMetaDataAux.']
656 elif (key == 'TruthMetaData'
657 and 'TruthMetaDataAux.' in persistent_instances):
658 aux = persistent_instances['TruthMetaDataAux.']
659 elif key == 'TruthMetaDataAux.':
660 continue
661 elif 'Menu' in key and key.endswith('Aux.'):
662 continue
663
664 return_obj = _convert_value(content, aux)
665
666 if 'TriggerMenuJson' in key or ('TriggerMenu' in key and not has_r3_trig_meta):
667 if 'RAWTriggerMenuJson' in return_obj:
668 meta_dict[filename][key] = return_obj['RAWTriggerMenuJson']
669 del return_obj['RAWTriggerMenuJson']
670 if 'TriggerConfigInfo' not in meta_dict[filename]:
671 meta_dict[filename]['TriggerConfigInfo'] = {}
672 if 'dbkey' in return_obj:
673 meta_dict[filename]['TriggerConfigInfo'][key.split('_')[-1]] = {
674 'key' : return_obj['dbkey'],
675 'name': return_obj['name']
676 }
677 del return_obj['dbkey']
678 del return_obj['name']
679 if 'TriggerMenu' not in meta_dict[filename]:
680 meta_dict[filename]['TriggerMenu'] = {}
681 meta_dict[filename]['TriggerMenu'].update(return_obj)
682 elif "FileMetaData" in key:
683 if "FileMetaData" not in meta_dict[filename]:
684 meta_dict[filename]["FileMetaData"] = dynamic_fmd_items
685 meta_dict[filename]["FileMetaData"].update(return_obj)
686 else:
687 meta_dict[filename][key] = return_obj
688
689 try:
690
691 esi_dict = next(key for key, value in meta_dict[filename].items()
692 if isinstance(value, dict) and "numberOfEvents" in value and
693 meta_dict[filename]["metadata_items"][key] == "EventStreamInfo")
694 msg.debug(f"{esi_dict=}")
695 meta_dict[filename]["nentries"] = meta_dict[filename][esi_dict]["numberOfEvents"]
696 except StopIteration as err:
697 msg.debug(f"Caught {err=}, {type(err)=}, falling back on opening the DataHeader"
698 " Container to read the number of entries")
699 meta_dict[filename]['nentries'] = dataheader_nentries(current_file)
700 msg.debug(f"{meta_dict[filename]['nentries']=}")
701
702 if unique_tag_info_values and mode=='iov':
703 unique_tag_info_values = False
704 msg.info('disabling "unique_tag_info_values" option for "iov" mode')
705
706
707
708
709
710
711 if unique_tag_info_values:
712 msg.info('MetaReader is called with the parameter "unique_tag_info_values" set to True. '
713 'This is a workaround to remove all duplicate values from "/TagInfo" key')
714 if '/TagInfo' in meta_dict[filename]:
715 for key, value in meta_dict[filename]['/TagInfo'].items():
716 if isinstance(value, list) and value:
717 if len(unique_values :=
set(value)) > 1:
718 msg.warn(
719 f"Found multiple values for {key}: {value}. "
720 "Looking for possible duplicates."
721 )
722 maybe_ok = False
723 if key == "AMITag":
724
725 unique_amitags =
set()
726 for amitags in unique_values:
727 unique_amitags.add(
728 "_".join({tag for tag in amitags.split("_") if tag})
729 )
730 if len(unique_amitags) == 1:
731 maybe_ok = True
732 elif key == "beam_energy":
733
734 unique_energies =
set()
735 for energy in unique_values:
736 try:
737 energy = int(energy)
738 except ValueError:
739 try:
740 energy = float(energy)
741 except ValueError:
742 pass
743 unique_energies.add(energy)
744 if len(unique_energies) == 1:
745 maybe_ok = True
746 elif key in ["AtlasRelease", "IOVDbGlobalTag", "AODFixVersion"]:
747 maybe_ok = True
748 if maybe_ok:
749 msg.warn(
750 f"Multiple values for {key} may mean the same, or "
751 "the input file was produced in multi-step job. "
752 f"Ignoring all but the first entry: {key} = {value[0]}"
753 )
754 else:
755 raise ValueError(
756 f"{key} from /TagInfo contains more than 1 unique value: {value}"
757 )
758
759 meta_dict[filename]['/TagInfo'][key] = value[0]
760
761 if promote is None:
762 promote = mode == 'lite' or mode == 'peeker'
763
764
765 if mode == 'lite':
766 meta_dict = make_lite(meta_dict)
767
768 if mode == 'peeker':
769 meta_dict = make_peeker(meta_dict)
770
771 if promote:
772 meta_dict = promote_keys(meta_dict, mode)
773
774
775 if not isGaudiEnv():
776 if isinstance(collectionTree, ROOT.TTree):
777 meta_dict[filename]['itemList'] = [ (b.GetClassName(), b.GetName()) for b in collectionTree.GetListOfBranches() ]
778
779
780 elif current_file_type == 'BS':
781
782 if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and not os.path.isfile(filename):
783 msg.warn('Ignoring not accessible file: {}'.format(filename))
784 continue
785
786 import eformat
787
788
789 bs = eformat.istream(filename)
790 meta_dict[filename]['nentries'] = bs.total_events
791
792
793 data_reader = eformat.EventStorage.pickDataReader(filename)
794 assert data_reader, 'problem picking a data reader for file [%s]' % filename
795
796
797 meta_dict[filename]['auto_flush'] = 1
798
799 if hasattr(data_reader, 'GUID'):
800 meta_dict[filename]['file_guid'] = data_reader.GUID()
801
802
803 meta_dict[filename]['file_comp_alg'] = 1
804 meta_dict[filename]['file_comp_level'] = 1
805
806
807
808
809 if mode != "tiny":
810 bs_metadata = {}
811
812 for md in data_reader.freeMetaDataStrings():
813 if md.startswith('Event type:'):
814 k = 'eventTypes'
815 v = []
816 if 'is sim' in md:
817 v.append('IS_SIMULATION')
818 else:
819 v.append('IS_DATA')
820
821 if 'is atlas' in md:
822 v.append('IS_ATLAS')
823 else:
824 v.append('IS_TESTBEAM')
825
826 if 'is physics' in md:
827 v.append('IS_PHYSICS')
828 else:
829 v.append('IS_CALIBRATION')
830
831 bs_metadata[k] = tuple(v)
832
833 elif md.startswith('GeoAtlas:'):
834 k = 'geometry'
835 v = md.split(
'GeoAtlas:')[1].
strip()
836 bs_metadata[k] = v
837
838 elif md.startswith('IOVDbGlobalTag:'):
839 k = 'conditions_tag'
840 v = md.split(
'IOVDbGlobalTag:')[1].
strip()
841 bs_metadata[k] = v
842
843 elif '=' in md:
844 k, v = md.split('=', 1)
845 bs_metadata[k] = v
846
847 bs_metadata['detectorMask'] = data_reader.detectorMask()
848 bs_metadata['runNumbers'] = data_reader.runNumber()
849 bs_metadata['lumiBlockNumbers'] = data_reader.lumiblockNumber()
850 bs_metadata['projectTag'] = data_reader.projectTag()
851 bs_metadata['stream'] = data_reader.stream()
852
853 beamTypeNbr= data_reader.beamType()
854
855
856
857
858 if (beamTypeNbr==0): bs_metadata['beamType'] = 'cosmics'
859 elif (beamTypeNbr==1 or beamTypeNbr==2): bs_metadata['beamType'] = 'collisions'
860 else: bs_metadata['beamType'] = 'unknown'
861
862 bs_metadata['beamEnergy'] = data_reader.beamEnergy()
863
864 meta_dict[filename]['eventTypes'] = bs_metadata.get('eventTypes', [])
865 meta_dict[filename]['GeoAtlas'] = bs_metadata.get('geometry', None)
866 meta_dict[filename]['conditions_tag'] = bs_metadata.get('conditions_tag', None)
867 meta_dict[filename]['project_name'] = bs_metadata.get('projectTag', None)
868
869
870 meta_dict[filename]['detectorMask'] = [bs_metadata.get('detectorMask', None)]
871 meta_dict[filename]['runNumbers'] = [bs_metadata.get('runNumbers', None)]
872 meta_dict[filename]['lumiBlockNumbers'] = [bs_metadata.get('lumiBlockNumbers', None)]
873 meta_dict[filename]['beam_type'] = bs_metadata.get('beamType', None)
874 meta_dict[filename]['beam_energy'] = bs_metadata.get('beamEnergy', None)
875 meta_dict[filename]['stream'] = bs_metadata.get('stream', None)
876
877 if not data_reader.good():
878
879 meta_dict[filename]['runNumbers'].append(bs_metadata.get('run_number', 0))
880 meta_dict[filename]['lumiBlockNumbers'].append(bs_metadata.get('LumiBlock', 0))
881
882 msg.debug(f"{meta_dict[filename]=}")
883 msg.debug(f"{len(bs)=}")
884 if len(bs):
885 evt = bs[0]
886 try:
887 evt.check()
888 meta_dict[filename]['processingTags'] = [tag.name for tag in evt.stream_tag()]
889 meta_dict[filename]['evt_number'] = [evt.global_id()]
890 meta_dict[filename]['run_type'] = [eformat.helper.run_type2string(evt.run_type())]
891
892
893 if meta_dict[filename]['lumiBlockNumbers'] == [0]:
894 msg.debug('Taking the luminosity block info from the first event (%i)', evt.lumi_block())
895 meta_dict[filename]['lumiBlockNumbers'] = [evt.lumi_block()]
896
897
898 if meta_dict[filename]['runNumbers'] == [0]:
899 msg.debug('Taking the run number info from the first event (%i)', evt.run_no())
900 meta_dict[filename]['runNumbers'] = [evt.run_no()]
901 except RuntimeError as err:
902 msg.error("Issue while reading the first event of BS file %r: %r", filename, err)
903 else:
904 msg.debug(f"{meta_dict[filename]=}")
905 else:
906 msg.warn(f"Event-less BS {filename=}, will not read metadata information from the first event")
907
908
909 if len(bs_metadata.get('eventTypes', '')) == 0:
910 evt_type = ['IS_DATA', 'IS_ATLAS']
911 if bs_metadata.get('stream', '').startswith('physics_'):
912 evt_type.append('IS_PHYSICS')
913 elif bs_metadata.get('stream', '').startswith('calibration_'):
914 evt_type.append('IS_CALIBRATION')
915 elif bs_metadata.get('projectTag', '').endswith('_calib'):
916 evt_type.append('IS_CALIBRATION')
917 else:
918 evt_type.append('Unknown')
919
920 meta_dict[filename]['eventTypes'] = evt_type
921
922 if mode == 'full':
923 meta_dict[filename]['bs_metadata'] = bs_metadata
924
925
926 else:
927 msg.error('Unknown filetype for {0} - there is no metadata interface for type {1}'.format(filename, current_file_type))
928 return None
929
930 return meta_dict
931
932
std::string replace(std::string s, const std::string &s2, const std::string &s3)