ATLAS Offline Software
Loading...
Searching...
No Matches
python.MetaReader Namespace Reference

Functions

 read_metadata (filenames, file_type=None, mode='lite', promote=None, meta_key_filter=None, unique_tag_info_values=True, ignoreNonExistingLocalFiles=False)
 _check_project ()
 _get_pfn (filename)
 _read_guid (filename)
 _extract_fields (obj)
 _convert_value (value, aux=None)
 _get_attribute_val (iov_container, attr_name, attr_idx)
 _extract_fields_iov (iov_container, idx_range)
 _extract_iov_detailed (iov_container)
 _extract_fields_iovmdc (value)
 _extract_fields_esi (value)
 _extract_fields_ef (value)
 _extract_fields_cbk (interface=None, aux=None)
 _extract_fields_fmd (interface=None, aux=None)
 _extract_fields_tmd (interface=None, aux=None)
 _extract_fields_triggermenu (interface, aux)
 _extract_fields_triggermenujson (interface, aux)
 _convert_event_type_user_type (value)
 _convert_event_type_bitmask (value)
 make_lite (meta_dict)
 make_peeker (meta_dict)
 promote_keys (meta_dict, mode)
 convert_itemList (metadata, layout)
 dataheader_nentries (infile)
dict get_meta_filter (mode="lite", meta_key_filter=None)
 denormalize_metadata_types (metadata_dict)
 should_keep_meta (normalizedName, typeName, meta_filter)

Variables

 msg = logging.getLogger('MetaReader')
 regexEventStreamInfo = re.compile(r'^EventStreamInfo(_p\d+)?$')
 regexIOVMetaDataContainer = re.compile(r'^IOVMetaDataContainer(_p\d+)?$')
 regexByteStreamMetadataContainer = re.compile(r'^ByteStreamMetadataContainer(_p\d+)?$')
 regexXAODCutBookkeeperContainer = re.compile(r'^xAOD::CutBookkeeperContainer(_v\d+)?$')
 regexXAODCutBookkeeperContainerAux = re.compile(r'^xAOD::CutBookkeeperAuxContainer(_v\d+)?$')
 regexXAODEventFormat = re.compile(r'^xAOD::EventFormat(_v\d+)?$')
 regexXAODFileMetaData = re.compile(r'^xAOD::FileMetaData(_v\d+)?$')
 regexXAODFileMetaDataAux = re.compile(r'^xAOD::FileMetaDataAuxInfo(_v\d+)?$')
 regexXAODFileMetaDataAuxDyn = re.compile(r'^(xAOD::)?FileMetaData.*AuxDyn(\.[a-zA-Z0-9]+)?$')
 regexXAODTriggerMenu = re.compile(r'^DataVector<xAOD::TriggerMenu(_v\d+)?>$')
 regexXAODTriggerMenuAux = re.compile(r'^xAOD::TriggerMenuAuxContainer(_v\d+)?$')
 regexXAODTriggerMenuJson = re.compile(r'^DataVector<xAOD::TriggerMenuJson(_v\d+)?>$')
 regexXAODTriggerMenuJsonAux = re.compile(r'^xAOD::TriggerMenuJsonAuxContainer(_v\d+)?$')
 regexXAODTruthMetaData = re.compile(r'^DataVector<xAOD::TruthMetaData(_v\d+)?>$')
 regexXAODTruthMetaDataAux = re.compile(r'^xAOD::TruthMetaDataAuxContainer(_v\d+)?$')
 regex_cppname = re.compile(r'^([\w:]+)(<.*>)?$')
 regex_persistent_class = re.compile(r'^([a-zA-Z]+(_[pv]\d+)?::)*[a-zA-Z]+_[pv]\d+$')
 regex_BS_files = re.compile(r'^(\w+):.*((\.D?RAW\..*)|(\.data$))')
 regex_URI_scheme = re.compile(r'^([A-Za-z0-9\+\.\-]+)\:')
list lite_primary_keys_to_keep
list lite_TagInfo_keys_to_keep
list trigger_keys
dict trigger_menu_json_map

Function Documentation

◆ _check_project()

python.MetaReader._check_project ( )
protected

Definition at line 933 of file MetaReader.py.

933def _check_project():
934 import os
935 if 'AthSimulation_DIR' in os.environ:
936 return 'AthSimulation'
937 if 'AthGeneration_DIR' in os.environ:
938 return 'AthGeneration'
939 return 'Athena'
940
941

◆ _convert_event_type_bitmask()

python.MetaReader._convert_event_type_bitmask ( value)
protected

Definition at line 1436 of file MetaReader.py.

1436def _convert_event_type_bitmask(value):
1437
1438 types = None
1439 for key in value:
1440 if key == 'bit_mask':
1441 val = value[key]
1442
1443 bitmask_length = len(val)
1444
1445 is_simulation = False
1446 is_testbeam = False
1447 is_calibration = False
1448
1449 if bitmask_length > 0: # ROOT.EventType.IS_SIMULATION
1450 is_simulation = val[0]
1451
1452 if bitmask_length > 1: # ROOT.EventType.IS_TESTBEAM
1453 is_testbeam = val[1]
1454
1455 if bitmask_length > 2: # ROOT.EventType.IS_CALIBRATION:
1456 is_calibration = val[2]
1457
1458 types = [
1459 'IS_SIMULATION' if is_simulation else 'IS_DATA',
1460 'IS_TESTBEAM' if is_testbeam else 'IS_ATLAS',
1461 'IS_CALIBRATION' if is_calibration else 'IS_PHYSICS'
1462 ]
1463
1464 value['type'] = types
1465 return value
1466
1467

◆ _convert_event_type_user_type()

python.MetaReader._convert_event_type_user_type ( value)
protected

Definition at line 1429 of file MetaReader.py.

1429def _convert_event_type_user_type(value):
1430 if 'user_type' in value:
1431 items = value['user_type'].split('#')[3:]
1432 for i in range(0, len(items), 2):
1433 value[items[i]] = _convert_value(items[i+1])
1434 return value
1435
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177

◆ _convert_value()

python.MetaReader._convert_value ( value,
aux = None )
protected

Definition at line 1036 of file MetaReader.py.

1036def _convert_value(value, aux = None):
1037 cl=value.__class__
1038 if hasattr(cl, '__cpp_name__'):
1039 result = regex_cppname.match(cl.__cpp_name__)
1040 if result:
1041 cpp_type = result.group(1)
1042 if cpp_type == 'vector' or cpp_type == 'std::vector':
1043 return [_convert_value(val) for val in value]
1044 elif cpp_type == 'set' or cpp_type == 'std::set':
1045 return {_convert_value(val) for val in value}
1046 elif cpp_type == 'pair' or cpp_type == 'std::pair':
1047 return _convert_value(value.first), _convert_value(value.second)
1048
1049 # elif cpp_type == 'long':
1050 # return int(value)
1051
1052 elif cpp_type == 'string' or cpp_type == 'std::string':
1053 return str(value)
1054
1055 elif cl.__cpp_name__ == "_Bit_reference":
1056 return bool(value)
1057
1058 # special case which extracts data in a better format from IOVPayloadContainer_p1 class
1059 elif cl.__cpp_name__ == 'IOVMetaDataContainer_p1':
1060 return _extract_fields_iovmdc(value)
1061
1062 elif cl.__cpp_name__ == 'IOVPayloadContainer_p1':
1063 if _gbl_mode == 'iov':
1064 return _extract_iov_detailed(value)
1065 else:
1066 return _extract_fields_iov( value, range(value.m_attrIndexes.size()) )
1067
1068 elif cl.__cpp_name__ == 'xAOD::EventFormat_v1':
1069 return _extract_fields_ef(value)
1070 elif cl.__cpp_name__ == 'xAOD::CutBookkeeperContainer_v1':
1071 return _extract_fields_cbk(interface=value, aux=aux)
1072 elif cl.__cpp_name__ == 'xAOD::FileMetaData_v1':
1073 return _extract_fields_fmd(interface=value, aux=aux)
1074 elif cl.__cpp_name__ == 'DataVector<xAOD::TruthMetaData_v1>':
1075 return _extract_fields_tmd(interface=value, aux=aux)
1076
1077 elif cl.__cpp_name__ == 'DataVector<xAOD::TriggerMenu_v1>' :
1078 return _extract_fields_triggermenu(interface=value, aux=aux)
1079
1080 elif cl.__cpp_name__ == 'DataVector<xAOD::TriggerMenuJson_v1>' :
1081 return _extract_fields_triggermenujson(interface=value, aux=aux)
1082
1083 elif (cl.__cpp_name__ == 'EventStreamInfo_p1' or
1084 cl.__cpp_name__ == 'EventStreamInfo_p2' or
1085 cl.__cpp_name__ == 'EventStreamInfo_p3'):
1086 return _extract_fields_esi(value)
1087
1088 elif (cl.__cpp_name__ == 'EventType_p1' or
1089 cl.__cpp_name__ == 'EventType_p3'):
1090 fields = _extract_fields(value)
1091 fields = _convert_event_type_bitmask(fields)
1092 fields = _convert_event_type_user_type(fields)
1093 return fields
1094
1095 elif regex_persistent_class.match(cl.__cpp_name__):
1096 return _extract_fields(value)
1097
1098 return value
1099
1100

◆ _extract_fields()

python.MetaReader._extract_fields ( obj)
protected

Definition at line 1021 of file MetaReader.py.

1021def _extract_fields(obj):
1022 result = {}
1023
1024 for meth in dir(obj):
1025 if not meth.startswith('_'):
1026 if meth.startswith('m_'):
1027
1028 field_name = str(meth)[2:]
1029 field_value = getattr(obj, meth)
1030
1031 result[field_name] = _convert_value(field_value)
1032
1033 return result
1034
1035

◆ _extract_fields_cbk()

python.MetaReader._extract_fields_cbk ( interface = None,
aux = None )
protected
Extract CutBookkeeper content into dictionary

This function takes the CutBookkeeperContainer_v1 and CutBookkeeperAuxContainer_v1 objects.
It makes sure the the interface object uses the auxiliary object as store.
    Args:
        interface (CutBookkeeperContainer_v1):     the interface class
        aux       (CutBookkeeperAuxContainer_v1):  auxiliary container object
    Returns
        dict: with the cycle number and last stream

Definition at line 1247 of file MetaReader.py.

1247def _extract_fields_cbk(interface=None, aux=None):
1248 """Extract CutBookkeeper content into dictionary
1249
1250 This function takes the CutBookkeeperContainer_v1 and CutBookkeeperAuxContainer_v1 objects.
1251 It makes sure the the interface object uses the auxiliary object as store.
1252 Args:
1253 interface (CutBookkeeperContainer_v1): the interface class
1254 aux (CutBookkeeperAuxContainer_v1): auxiliary container object
1255 Returns
1256 dict: with the cycle number and last stream
1257 """
1258 if not interface or not aux:
1259 return {}
1260 interface.setStore(aux)
1261
1262 max_cycle = -1
1263 input_stream = ''
1264
1265 for cbk in interface:
1266 current_cycle = int(cbk.cycle())
1267 if current_cycle > max_cycle:
1268 max_cycle = current_cycle
1269 input_stream = str(cbk.inputStream())
1270
1271 result = {
1272 'currentCutCycle': max_cycle,
1273 'currentCutInputStream': input_stream,
1274 }
1275 return result
1276
1277

◆ _extract_fields_ef()

python.MetaReader._extract_fields_ef ( value)
protected

Definition at line 1238 of file MetaReader.py.

1238def _extract_fields_ef(value):
1239 result = {}
1240
1241 for ef_element in value:
1242 result[ef_element.first] = ef_element.second.className()
1243
1244 return result
1245
1246

◆ _extract_fields_esi()

python.MetaReader._extract_fields_esi ( value)
protected

Definition at line 1214 of file MetaReader.py.

1214def _extract_fields_esi(value):
1215 result = {}
1216
1217 result['eventTypes'] = []
1218 for eventType in value.m_eventTypes:
1219 result['eventTypes'].append(_convert_value(eventType))
1220
1221 result['numberOfEvents'] = value.m_numberOfEvents
1222 result['runNumbers'] = list(value.m_runNumbers)
1223 result['lumiBlockNumbers'] = list(value.m_lumiBlockNumbers)
1224 result['processingTags'] = [str(v) for v in value.m_processingTags]
1225 result['itemList'] = []
1226
1227 # Get the class name in the repository with CLID <clid>
1228 from CLIDComps.clidGenerator import clidGenerator
1229 cgen = clidGenerator("")
1230 for clid, sgkey in value.m_itemList:
1231 if isinstance(sgkey, bytes):
1232 sgkey = sgkey.decode()
1233 result['itemList'].append((cgen.getNameFromClid(clid), sgkey))
1234
1235 return result
1236
1237

◆ _extract_fields_fmd()

python.MetaReader._extract_fields_fmd ( interface = None,
aux = None )
protected
Turn static FileMetaData content into dictionary

This function takes the FileMetaData_v1 and FileMetaDataAuxInfo_v1 objects.
It makes sure the the interface object uses the auxiliary object as store.
Next the two static variables of FileMetaDataAuxInfo_v1 are retrieved and
added to the dictionary that is returned.
    Args:
        interface (FileMetaData_v1):        the interface class
        aux       (FileMetaDataAuxInfo_v1): auxiliary container object
    Returns
        dict: with the production release and dataType

Definition at line 1278 of file MetaReader.py.

1278def _extract_fields_fmd(interface=None, aux=None):
1279 """Turn static FileMetaData content into dictionary
1280
1281 This function takes the FileMetaData_v1 and FileMetaDataAuxInfo_v1 objects.
1282 It makes sure the the interface object uses the auxiliary object as store.
1283 Next the two static variables of FileMetaDataAuxInfo_v1 are retrieved and
1284 added to the dictionary that is returned.
1285 Args:
1286 interface (FileMetaData_v1): the interface class
1287 aux (FileMetaDataAuxInfo_v1): auxiliary container object
1288 Returns
1289 dict: with the production release and dataType
1290 """
1291 import ROOT
1292 if not interface or not aux:
1293 return {}
1294 interface.setStore(aux)
1295 metaContent = {
1296 "productionRelease": ROOT.std.string(),
1297 "dataType": ROOT.std.string(),
1298 "runNumbers": ROOT.std.vector('unsigned int')(),
1299 "lumiBlocks": ROOT.std.vector('unsigned int')(),
1300 }
1301 # Note: using this for dynamic attributes returns empty content
1302 for k, v in metaContent.items():
1303 try:
1304 interface.value(getattr(interface, k), v)
1305 except AttributeError:
1306 interface.value(k, v)
1307 # Now return python objects
1308 result = {k: str(v) for k, v in metaContent.items() if type(v) is ROOT.std.string}
1309 result.update({k: list(v) for k, v in metaContent.items() if type(v) is ROOT.std.vector('unsigned int')})
1310 return result
1311
1312

◆ _extract_fields_iov()

python.MetaReader._extract_fields_iov ( iov_container,
idx_range )
protected

Definition at line 1154 of file MetaReader.py.

1154def _extract_fields_iov( iov_container, idx_range ):
1155 result = {}
1156
1157 for idx in idx_range:
1158 attr_idx = iov_container.m_attrIndexes[idx]
1159 name_idx = attr_idx.nameIndex()
1160 attr_name = iov_container.m_attrName[name_idx]
1161 attr_value = _get_attribute_val(iov_container, attr_name, attr_idx)
1162
1163 if attr_name not in result:
1164 result[attr_name] = [attr_value]
1165 else:
1166 result[attr_name].append(attr_value)
1167
1168 max_element_count = 0
1169 for content in result.values():
1170 if len(content) > max_element_count:
1171 max_element_count = len(content)
1172
1173 if max_element_count <= 1:
1174 for name, content in result.items():
1175 if len(content) > 0:
1176 result[name] = content[0]
1177 else:
1178 result[name] = None
1179
1180 return result
1181
1182

◆ _extract_fields_iovmdc()

python.MetaReader._extract_fields_iovmdc ( value)
protected

Definition at line 1210 of file MetaReader.py.

1210def _extract_fields_iovmdc(value):
1211 return _convert_value(value.m_payload)
1212
1213

◆ _extract_fields_tmd()

python.MetaReader._extract_fields_tmd ( interface = None,
aux = None )
protected

Definition at line 1313 of file MetaReader.py.

1313def _extract_fields_tmd(interface=None, aux=None):
1314 import ROOT
1315 BadAuxVarException = ROOT.SG.ExcBadAuxVar
1316 """Extract TruthMetaData content into dictionary
1317
1318 This function takes the TruthMetaDataContainer_v1 and TruthMetaDataAuxContainer_v1 objects.
1319 It makes sure the the interface object uses the auxiliary object as store.
1320 Args:
1321 interface (TruthMetaDataContainer_v1): the interface class
1322 aux (TruthMetaDataAuxContainer_v1): auxiliary container object
1323 Returns
1324 dict
1325 """
1326 if not interface or not aux:
1327 return {}
1328 interface.setStore(aux)
1329
1330 # return the first as we do not really expect more than one
1331 result = {}
1332 for tmd in interface:
1333 result['mcChannelNumber'] = tmd.mcChannelNumber()
1334
1335 try:
1336 result['weightNames'] = list(tmd.weightNames())
1337 except BadAuxVarException:
1338 result['weightNames'] = []
1339
1340 try:
1341 result['lhefGenerator'] = str(tmd.lhefGenerator())
1342 except BadAuxVarException:
1343 result['lhefGenerator'] = ''
1344
1345 try:
1346 result['generators'] = str(tmd.generators())
1347 except BadAuxVarException:
1348 result['generators'] = ''
1349
1350 try:
1351 result['evgenProcess'] = str(tmd.evgenProcess())
1352 except BadAuxVarException:
1353 result['evgenProcess'] = ''
1354
1355 try:
1356 result['evgenTune'] = str(tmd.evgenTune())
1357 except BadAuxVarException:
1358 result['evgenTune'] = ''
1359
1360 try:
1361 result['hardPDF'] = str(tmd.hardPDF())
1362 except BadAuxVarException:
1363 result['hardPDF'] = ''
1364
1365 try:
1366 result['softPDF'] = str(tmd.softPDF())
1367 except BadAuxVarException:
1368 result['softPDF'] = ''
1369
1370 return result
1371
1372
1373""" Note: Deprecated. Legacy support for Run 2 AODs produced in release 21 or in release 22 prior to April 2021
1374"""

◆ _extract_fields_triggermenu()

python.MetaReader._extract_fields_triggermenu ( interface,
aux )
protected

Definition at line 1375 of file MetaReader.py.

1375def _extract_fields_triggermenu(interface, aux):
1376 if aux is None:
1377 return {}
1378
1379 L1Items = []
1380 HLTChains = []
1381
1382 try:
1383 interface.setStore( aux )
1384 if interface.size() > 0:
1385 # We make the assumption that the first stored SMK is
1386 # representative of all events in the input collection.
1387 firstMenu = interface.at(0)
1388 L1Items = [ _convert_value(item) for item in firstMenu.itemNames() ]
1389 HLTChains = [ _convert_value(chain) for chain in firstMenu.chainNames() ]
1390 except Exception as err: # noqa: F841
1391 msg.warn('Problem reading xAOD::TriggerMenu:')
1392
1393 result = {}
1394 result['L1Items'] = L1Items
1395 result['HLTChains'] = HLTChains
1396
1397 return result
1398

◆ _extract_fields_triggermenujson()

python.MetaReader._extract_fields_triggermenujson ( interface,
aux )
protected

Definition at line 1399 of file MetaReader.py.

1399def _extract_fields_triggermenujson(interface, aux):
1400 result = {}
1401
1402 try:
1403 interface.setStore( aux )
1404 if interface.size() > 0:
1405 # We make the assumption that the first stored SMK is
1406 # representative of all events in the input collection.
1407 firstMenu = interface.at(0)
1408 import json
1409 decoded = json.loads(firstMenu.payload())
1410 result['RAWTriggerMenuJson'] = firstMenu.payload()
1411 result['name'] = firstMenu.name()
1412 result['dbkey'] = firstMenu.key()
1413 if decoded['filetype'] == 'hltmenu':
1414 result['HLTChains'] = [ _convert_value(chain) for chain in decoded['chains'] ]
1415 elif decoded['filetype'] == 'l1menu':
1416 result['L1Items'] = [ _convert_value(item) for item in decoded['items'] ]
1417 elif decoded['filetype'] in ['bunchgroupset', 'hltprescale', 'l1prescale', 'hltmonitoringsummary']:
1418 return result
1419
1420 else:
1421 msg.warn('Got an xAOD::TriggerMenuJson called {0} but only expecting hltmenu or l1menu'.format(decoded['filetype']))
1422 return {}
1423
1424 except Exception as err: # noqa: F841
1425 msg.warn('Problem reading xAOD::TriggerMenuJson')
1426
1427 return result
1428

◆ _extract_iov_detailed()

python.MetaReader._extract_iov_detailed ( iov_container)
protected

Definition at line 1183 of file MetaReader.py.

1183def _extract_iov_detailed(iov_container):
1184 def iovtostr(t):
1185 # break iov time into high and low halves (run number usually in the higher half)
1186 return "({h}:{l})".format(h=t>>32, l=t&(2^32-1))
1187
1188 def extract_list_collection(iov_container, listCollection ):
1189 result = {}
1190 ln = 0
1191 for list in listCollection.m_attrLists:
1192 ln = ln + 1
1193 lname = 'List {ln}: iov=[{s} ,{e}]; Channel#={ch}'.format(
1194 ln=ln, s=iovtostr(list.m_range.m_start),
1195 e=iovtostr(list.m_range.m_stop),
1196 ch=list.m_channelNumber )
1197 result[ lname ] = _extract_fields_iov( iov_container, range(list.m_firstIndex, list.m_lastIndex) )
1198 return result
1199
1200 result = {}
1201 pn = 0
1202 for listCollection in iov_container.m_payloadVec:
1203 pn = pn + 1
1204 pname = 'IOV range {n}: [{s}, {e}]'.format(n=pn, s=iovtostr(listCollection.m_start),
1205 e=iovtostr(listCollection.m_stop))
1206 result[ pname ] = extract_list_collection(iov_container, listCollection )
1207 return result
1208
1209

◆ _get_attribute_val()

python.MetaReader._get_attribute_val ( iov_container,
attr_name,
attr_idx )
protected

Definition at line 1101 of file MetaReader.py.

1101def _get_attribute_val(iov_container, attr_name, attr_idx):
1102 type_idx = attr_idx.typeIndex()
1103 obj_idx = attr_idx.objIndex()
1104
1105 attr_value = None
1106
1107 if type_idx == 0:
1108 attr_value = bool(iov_container.m_bool[obj_idx])
1109 elif type_idx == 1:
1110 attr_value = int(iov_container.m_char[obj_idx])
1111 elif type_idx == 2:
1112 attr_value = int(iov_container.m_unsignedChar[obj_idx])
1113 elif type_idx == 3:
1114 attr_value = int(iov_container.m_short[obj_idx])
1115 elif type_idx == 4:
1116 attr_value = int(iov_container.m_unsignedShort[obj_idx])
1117 elif type_idx == 5:
1118 attr_value = int(iov_container.m_int[obj_idx])
1119 elif type_idx == 6:
1120 attr_value = int(iov_container.m_unsignedInt[obj_idx])
1121 elif type_idx == 7:
1122 attr_value = int(iov_container.m_long[obj_idx])
1123 elif type_idx == 8:
1124 attr_value = int(iov_container.m_unsignedLong[obj_idx])
1125 elif type_idx == 9:
1126 attr_value = int(iov_container.m_longLong[obj_idx])
1127 elif type_idx == 10:
1128 attr_value = int(iov_container.m_unsignedLongLong[obj_idx])
1129 elif type_idx == 11:
1130 attr_value = float(iov_container.m_float[obj_idx])
1131 elif type_idx == 12:
1132 attr_value = float(iov_container.m_double[obj_idx])
1133 elif type_idx == 13:
1134 # skipping this type because is file IOVPayloadContainer_p1.h (line 120) is commented and not considered
1135 pass
1136 elif type_idx == 14:
1137 attr_value = str(iov_container.m_string[obj_idx])
1138 # Cleaning class name from value
1139 if attr_value.startswith('IOVMetaDataContainer_p1_'):
1140 attr_value = attr_value.replace('IOVMetaDataContainer_p1_', '')
1141 if attr_value.startswith('_'):
1142 attr_value = attr_value.replace('_', '/')
1143 # Now it is clean
1144 elif type_idx == 15:
1145 attr_value = int(iov_container.m_date[obj_idx])
1146 elif type_idx == 16:
1147 attr_value = int(iov_container.m_timeStamp[obj_idx])
1148 else:
1149 raise ValueError('Unknown type id {0} for attribute {1}'.format(type_idx, attr_name))
1150
1151 return attr_value
1152
1153

◆ _get_pfn()

python.MetaReader._get_pfn ( filename)
protected
Extract the actual filename if LFN or PFN notation is used

Definition at line 942 of file MetaReader.py.

942def _get_pfn(filename):
943 """
944 Extract the actual filename if LFN or PFN notation is used
945 """
946 pfx = filename[0:4]
947 if pfx == 'PFN:':
948 return filename[4:]
949 if pfx == 'LFN:':
950 import subprocess, os
951 os.environ['POOL_OUTMSG_LEVEL'] = 'Error'
952 output = subprocess.check_output(['FClistPFN','-l',filename[4:]],text=True).split('\n')
953 if len(output) == 2:
954 return output[0]
955 msg.error( 'FClistPFN({0}) returned unexpected number of lines:'.format(filename) )
956 msg.error( '\n'.join(output) )
957 return filename
958
959

◆ _read_guid()

python.MetaReader._read_guid ( filename)
protected
Extracts the "guid" (Globally Unique Identifier) in POOL files and Grid catalogs) value from a POOL file.
:param filename: the input file
:return: the guid value, None if unavailable

Definition at line 960 of file MetaReader.py.

960def _read_guid(filename):
961 """
962 Extracts the "guid" (Globally Unique Identifier) in POOL files and Grid catalogs) value from a POOL file.
963 :param filename: the input file
964 :return: the guid value, None if unavailable
965 """
966 import ROOT
967 root_file = ROOT.TFile.Open( _get_pfn(filename) )
968 params = root_file.Get('##Params')
969 try:
970 from ROOT import RNTuple as rnt
971 except ImportError:
972 from ROOT.Experimental import RNTuple as rnt
973 if not params:
974 return
975 if not isinstance(params, ROOT.TTree) and not isinstance(params, rnt) and not isinstance(params, ROOT.TDirectory):
976 raise NotImplementedError(f"Cannot extract GUID from object {params!r} of type {type(params)!r}")
977
978 regex = re.compile(r'\[NAME=(\w+)\]\[VALUE=(.*)\]', re.ASCII)
979 fid = None
980
981 if isinstance(params, ROOT.TTree):
982 for entry in params:
983 param = entry.GetLeaf('db_string').GetValueString()
984 result = regex.match(param)
985 if result and result.group(1) == 'FID' :
986 # don't exit yet, it's the last FID entry that counts
987 fid = result.group(2)
988 elif isinstance(params, rnt):
989 try:
990 from ROOT import RNTupleReader
991 except ImportError:
992 from ROOT.Experimental import RNTupleReader
993 reader = RNTupleReader.Open(params)
994 try:
995 entry = reader.CreateEntry()
996 except AttributeError:
997 entry = reader.GetModel().CreateEntry()
998 for idx in range(reader.GetNEntries()):
999 reader.LoadEntry(idx, entry)
1000 try:
1001 result = regex.match(str(entry['db_string']))
1002 except (AttributeError, TypeError) as err:
1003 # Early RNTuple implementation doesn't allow reading
1004 # strings on the python side, might be triggering it...
1005 msg.error(f"Cannot read FID from ##Params in RNTuple w/ ROOT error: {err}")
1006 return None
1007 if result and result.group(1) == 'FID' :
1008 # don't exit yet, it's the last FID entry that counts
1009 fid = result.group(2)
1010 elif isinstance(params, ROOT.TDirectory):
1011 for key in params.GetListOfKeys():
1012 param = params.Get(key.GetName())
1013 result = regex.match(str(param))
1014 if result and result.group(1) == 'FID' :
1015 # don't exit yet, it's the last FID entry that counts
1016 fid = result.group(2)
1017
1018 return fid
1019
1020

◆ convert_itemList()

python.MetaReader.convert_itemList ( metadata,
layout )
This function will rearrange the itemList values to match the format of 'eventdata_items', 'eventdata_itemsList'
or 'eventdata_itemsDic' generated with the legacy file peeker tool
:param metadata: a dictionary obtained using read_metadata method.
                 The mode for read_metadata must be 'peeker of 'full'
:param layout: the mode in which the data will be converted:
            * for 'eventdata_items' use: layout= None
            * for 'eventdata_itemsList' use: layout= '#join'
            * for 'eventdata_itemsDic' use: layout= 'dict'

Definition at line 1696 of file MetaReader.py.

1696def convert_itemList(metadata, layout):
1697 """
1698 This function will rearrange the itemList values to match the format of 'eventdata_items', 'eventdata_itemsList'
1699 or 'eventdata_itemsDic' generated with the legacy file peeker tool
1700 :param metadata: a dictionary obtained using read_metadata method.
1701 The mode for read_metadata must be 'peeker of 'full'
1702 :param layout: the mode in which the data will be converted:
1703 * for 'eventdata_items' use: layout= None
1704 * for 'eventdata_itemsList' use: layout= '#join'
1705 * for 'eventdata_itemsDic' use: layout= 'dict'
1706 """
1707
1708 # Find the itemsList:
1709 item_list = None
1710
1711 if 'itemList' in metadata:
1712 item_list = metadata['itemList']
1713 else:
1714
1715 current_key = None
1716
1717 for key in metadata:
1718 if 'metadata_items' in metadata and key in metadata['metadata_items'] and metadata['metadata_items'][key] == 'EventStreamInfo_p3':
1719 current_key = key
1720 break
1721 if current_key is not None:
1722 item_list = metadata[current_key]['itemList']
1723
1724 if item_list is not None:
1725
1726 if layout is None:
1727 return item_list
1728
1729 elif layout == '#join':
1730 return [k + '#' + v for k, v in item_list if k]
1731
1732
1733 elif layout == 'dict':
1734 from collections import defaultdict
1735 dic = defaultdict(list)
1736
1737 for k, v in item_list:
1738 dic[k].append(v)
1739
1740 return dict(dic)
1741
1742

◆ dataheader_nentries()

python.MetaReader.dataheader_nentries ( infile)
Extract number of entries from DataHeader.

infile  ROOT TFile object or filename string
return  Number of entries as returned by DataHeader object in infile,
        None in absence of DataHeader object

Definition at line 1743 of file MetaReader.py.

1743def dataheader_nentries(infile):
1744 """Extract number of entries from DataHeader.
1745
1746 infile ROOT TFile object or filename string
1747 return Number of entries as returned by DataHeader object in infile,
1748 None in absence of DataHeader object
1749 """
1750 import ROOT
1751 from PyUtils.PoolFile import PoolOpts
1752 if not isinstance(infile, ROOT.TFile):
1753 infile = ROOT.TFile.Open(infile)
1754
1755 for name in {PoolOpts.TTreeNames.DataHeader, PoolOpts.RNTupleNames.DataHeader}:
1756 obj = infile.Get(name)
1757 msg.debug(f"dataheader_nentries: {name=}, {obj=}, {type(obj)=}")
1758 if not obj:
1759 continue
1760 if isinstance(obj, ROOT.TTree):
1761 return obj.GetEntriesFast()
1762 else:
1763 # check early to avoid scary ROOT read errors
1764 if ROOT.gROOT.GetVersionInt() < 63100:
1765 raise RuntimeError("ROOT ver. 6.31/01 or greater needed to read RNTuple files")
1766 if isRNTuple(obj):
1767 try:
1768 return ROOT.Experimental.RNTupleReader.Open(obj).GetNEntries()
1769 except AttributeError:
1770 return ROOT.RNTupleReader.Open(obj).GetNEntries()
1771 else:
1772 raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
1773

◆ denormalize_metadata_types()

python.MetaReader.denormalize_metadata_types ( metadata_dict)
Convert canonical/C++ STL types in the metadata_items dictionary back to their
ROOT equivalents for backward compatibility.
- 'float' => 'Float_t'
- 'char' => 'Char_t'
- 'std::string' => 'string'
- 'xAOD::FileMetaData_v1' => 'FileMetaData'
- 'xAOD::FileMetaDataAuxInfo_v1' => 'FileMetaDataAux'
(add more as needed)

Definition at line 1849 of file MetaReader.py.

1849def denormalize_metadata_types(metadata_dict):
1850 """
1851 Convert canonical/C++ STL types in the metadata_items dictionary back to their
1852 ROOT equivalents for backward compatibility.
1853 - 'float' => 'Float_t'
1854 - 'char' => 'Char_t'
1855 - 'std::string' => 'string'
1856 - 'xAOD::FileMetaData_v1' => 'FileMetaData'
1857 - 'xAOD::FileMetaDataAuxInfo_v1' => 'FileMetaDataAux'
1858 (add more as needed)
1859 """
1860 type_map = {
1861 "float": "Float_t",
1862 "char": "Char_t",
1863 "std::string": "string",
1864 "std::uint32_t": "UInt_t",
1865 "xAOD::FileMetaData_v1": "FileMetaData",
1866 }
1867 denormalized = {}
1868 for k, v in metadata_dict.items():
1869 new_v = v
1870 for old, new in type_map.items():
1871 if new_v == old:
1872 new_v = new
1873 elif new_v.endswith("." + old):
1874 new_v = new_v.rsplit(".", 1)[0] + "." + new
1875 denormalized[k] = new_v
1876 return denormalized
1877
1878

◆ get_meta_filter()

dict python.MetaReader.get_meta_filter ( mode = "lite",
meta_key_filter = None )
Return a dictionary of metadata filters based on the mode and
optional meta_key_filter.

Definition at line 1774 of file MetaReader.py.

1774def get_meta_filter(mode="lite", meta_key_filter=None) -> dict:
1775 """Return a dictionary of metadata filters based on the mode and
1776 optional meta_key_filter.
1777 """
1778
1779 if meta_key_filter is None:
1780 meta_key_filter = []
1781
1782 # create a container for the list of filters used for the lite version
1783 meta_filter = {}
1784
1785 # set the filters for name
1786 if mode == 'lite':
1787 if isGaudiEnv():
1788 meta_filter = {
1789 '/TagInfo': 'IOVMetaDataContainer_p1',
1790 'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
1791 '*': 'EventStreamInfo_p*'
1792 }
1793 else:
1794 meta_filter = {
1795 'FileMetaData': '*',
1796 'FileMetaDataAux.': 'xAOD::FileMetaDataAuxInfo_v1',
1797 }
1798
1799 # set the filters for name
1800 if mode == 'peeker':
1801 meta_filter.update({
1802 'TriggerMenu': 'DataVector<xAOD::TriggerMenu_v1>', # R2 trigger metadata format AOD (deprecated)
1803 'TriggerMenuAux.': 'xAOD::TriggerMenuAuxContainer_v1',
1804 'DataVector<xAOD::TriggerMenu_v1>_TriggerMenu': 'DataVector<xAOD::TriggerMenu_v1>', # R2 trigger metadata format ESD (deprecated)
1805 'xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.': 'xAOD::TriggerMenuAuxContainer_v1',
1806 'TriggerMenuJson_HLT': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1807 'TriggerMenuJson_HLTAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1808 'TriggerMenuJson_HLTMonitoring': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1809 'TriggerMenuJson_HLTMonitoringAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1810 'TriggerMenuJson_HLTPS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1811 'TriggerMenuJson_HLTPSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1812 'TriggerMenuJson_L1': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1813 'TriggerMenuJson_L1Aux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1814 'TriggerMenuJson_L1PS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format AOD
1815 'TriggerMenuJson_L1PSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1816 'CutBookkeepers': 'xAOD::CutBookkeeperContainer_v1',
1817 'CutBookkeepersAux.': 'xAOD::CutBookkeeperAuxContainer_v1',
1818 'FileMetaData': '*',
1819 'FileMetaDataAux.': 'xAOD::FileMetaDataAuxInfo_v1',
1820 'TruthMetaData': '*',
1821 'TruthMetaDataAux.': 'xAOD::TruthMetaDataAuxContainer_v1',
1822 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1823 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1824 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1825 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTMonitoringAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1826 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1827 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTPSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1828 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1829 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1Aux.': 'xAOD::TriggerMenuJsonAuxContainer_v1',
1830 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS': 'DataVector<xAOD::TriggerMenuJson_v1>', # R3 trigger metadata format ESD
1831 'xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1PSAux.': 'xAOD::TriggerMenuJsonAuxContainer_v1'
1832 })
1833
1834 if isGaudiEnv():
1835 meta_filter.update({
1836 '/TagInfo': 'IOVMetaDataContainer_p1',
1837 'IOVMetaDataContainer_p1__TagInfo': 'IOVMetaDataContainer_p1',
1838 '/Simulation/Parameters': 'IOVMetaDataContainer_p1',
1839 '/Digitization/Parameters': 'IOVMetaDataContainer_p1',
1840 '/EXT/DCS/MAGNETS/SENSORDATA': 'IOVMetaDataContainer_p1',
1841 '*': 'EventStreamInfo_p*'
1842 })
1843
1844 if (mode == 'full' or mode == 'iov') and meta_key_filter:
1845 meta_filter = {f: '*' for f in meta_key_filter}
1846
1847 return meta_filter
1848

◆ make_lite()

python.MetaReader.make_lite ( meta_dict)

Definition at line 1468 of file MetaReader.py.

1468def make_lite(meta_dict):
1469 for filename, file_content in meta_dict.items():
1470 for key in file_content:
1471 if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
1472 for item in list(meta_dict[filename][key]):
1473 if item not in lite_primary_keys_to_keep:
1474 meta_dict[filename][key].pop(item)
1475
1476 if '/TagInfo' in file_content:
1477
1478
1479 for item in list(meta_dict[filename]['/TagInfo']):
1480 if item not in lite_TagInfo_keys_to_keep:
1481 meta_dict[filename]['/TagInfo'].pop(item)
1482 return meta_dict
1483
1484

◆ make_peeker()

python.MetaReader.make_peeker ( meta_dict)

Definition at line 1485 of file MetaReader.py.

1485def make_peeker(meta_dict):
1486 for filename, file_content in meta_dict.items():
1487 for key in file_content:
1488 if key in meta_dict[filename]['metadata_items'] and regexEventStreamInfo.match(meta_dict[filename]['metadata_items'][key]):
1489 keys_to_keep = [
1490 'lumiBlockNumbers',
1491 'runNumbers',
1492 'mc_event_number',
1493 'mc_channel_number',
1494 'eventTypes',
1495 'processingTags',
1496 'itemList'
1497 ]
1498 for item in list(meta_dict[filename][key]):
1499 if item not in keys_to_keep:
1500 meta_dict[filename][key].pop(item)
1501
1502 if '/TagInfo' in file_content:
1503 keys_to_keep = [
1504 'beam_energy',
1505 'beam_type',
1506 'GeoAtlas',
1507 'IOVDbGlobalTag',
1508 'AODFixVersion',
1509 'AMITag',
1510 'project_name',
1511 'triggerStreamOfFile',
1512 'AtlasRelease',
1513 'specialConfiguration',
1514 'mc_campaign',
1515 'hepmc_version',
1516 'generators',
1517 'keywords',
1518 'data_year',
1519 ]
1520 for item in list(meta_dict[filename]['/TagInfo']):
1521 if item not in keys_to_keep:
1522 meta_dict[filename]['/TagInfo'].pop(item)
1523
1524 if '/Simulation/Parameters' in file_content:
1525 keys_to_keep = [
1526 'G4Version',
1527 'TruthStrategy',
1528 'SimBarcodeOffset',
1529 'RegenerationIncrement',
1530 'TRTRangeCut',
1531 'SimulationFlavour',
1532 'Simulator',
1533 'PhysicsList',
1534 'SimulatedDetectors',
1535 'IsDataOverlay',
1536 ]
1537 for item in list(meta_dict[filename]['/Simulation/Parameters']):
1538 if item not in keys_to_keep:
1539 meta_dict[filename]['/Simulation/Parameters'].pop(item)
1540
1541 if '/Digitization/Parameters' in file_content:
1542 keys_to_keep = [
1543 'numberOfCollisions',
1544 'intraTrainBunchSpacing',
1545 'BeamIntensityPattern'
1546 'physicsList',
1547 'digiSteeringConf',
1548 'pileUp',
1549 'DigitizedDetectors',
1550 ]
1551 for item in list(meta_dict[filename]['/Digitization/Parameters']):
1552 if item not in keys_to_keep:
1553 meta_dict[filename]['/Digitization/Parameters'].pop(item)
1554
1555 if 'CutBookkeepers' in file_content:
1556 keys_to_keep = [
1557 'currentCutCycle',
1558 'currentCutInputStream',
1559 ]
1560 for item in list(meta_dict[filename]['CutBookkeepers']):
1561 if item not in keys_to_keep:
1562 meta_dict[filename]['CutBookkeepers'].pop(item)
1563
1564 if 'TruthMetaData' in file_content:
1565 keys_to_keep = [
1566 'mcChannelNumber',
1567 'weightNames',
1568 ]
1569 for item in list(meta_dict[filename]['TruthMetaData']):
1570 if item not in keys_to_keep:
1571 meta_dict[filename]['TruthMetaData'].pop(item)
1572
1573 return meta_dict
1574
1575

◆ promote_keys()

python.MetaReader.promote_keys ( meta_dict,
mode )

Definition at line 1576 of file MetaReader.py.

1576def promote_keys(meta_dict, mode):
1577 for filename, file_content in meta_dict.items():
1578 md = meta_dict[filename]
1579 for key in file_content:
1580 if key in md['metadata_items'] and regexEventStreamInfo.match(md['metadata_items'][key]):
1581 md.update(md[key])
1582
1583 if 'eventTypes' in md and len(md['eventTypes']):
1584 et = md['eventTypes'][0]
1585 md['mc_event_number'] = et.get('mc_event_number', md['runNumbers'][0])
1586 if 'mc_channel_number' in et:
1587 md['mc_channel_number'] = et.get('mc_channel_number', None)
1588 md['eventTypes'] = et['type']
1589
1590 # For very old files
1591 if 'GeoAtlas' in et:
1592 md['GeoAtlas'] = et.get('GeoAtlas', None)
1593 if 'IOVDbGlobalTag' in et:
1594 md['IOVDbGlobalTag'] = et.get('IOVDbGlobalTag', None)
1595
1596 if 'lumiBlockNumbers' in md[key]:
1597 md['lumiBlockNumbers'] = md[key]['lumiBlockNumbers']
1598
1599 if 'processingTags' in md[key]:
1600 md['processingTags'] = md[key]['processingTags']
1601
1602 meta_dict[filename].pop(key)
1603 break
1604
1605 if not isGaudiEnv() and key in md['metadata_items'] and 'FileMetaData' in key:
1606 if 'beamType' in md[key]:
1607 md['beam_type'] = md[key]['beamType']
1608
1609 if 'runNumbers' in md[key]:
1610 md['runNumbers'] = md[key]['runNumbers']
1611
1612 if 'mcProcID' in md[key]:
1613 md['mc_channel_number'] = int(md[key]['mcProcID'])
1614
1615 if 'mcCampaign' in md[key]:
1616 md['mc_campaign'] = md[key]['mcCampaign']
1617
1618 if 'dataYear' in md[key]:
1619 md['data_year'] = int(md[key]['dataYear'])
1620
1621 if 'lumiBlocks' in md[key]:
1622 md['lumiBlockNumbers'] = md[key]['lumiBlocks']
1623
1624 if mode == 'peeker' and 'amiTag' in md[key]:
1625 md['AMITag'] = md[key]['amiTag']
1626
1627 if 'beamEnergy' in md[key]:
1628 md['beam_energy'] = int(md[key]['beamEnergy'])
1629
1630 if 'geometryVersion' in md[key]:
1631 md['GeoAtlas'] = md[key]['geometryVersion']
1632
1633 # EventType checks
1634 md['eventTypes'] = []
1635 if mode == 'peeker' and 'simFlavour' in md[key]:
1636 md['SimulationFlavour'] = md[key]['simFlavour']
1637
1638 if mode == 'peeker' and 'isDataOverlay' in md[key]:
1639 md['IsDataOverlay'] = md[key]['isDataOverlay']
1640
1641 if 'dataType' in md[key]:
1642 md['processingTags'] = [md[key]['dataType']]
1643
1644 if (
1645 ('simFlavour' in md[key] and ('FullG4' in md[key]['simFlavour'] or 'ATLFAST' in md[key]['simFlavour']))
1646 or 'DAOD_TRUTH' in md[key]['dataType']
1647 ):
1648 md['eventTypes'].append('IS_SIMULATION')
1649 else:
1650 md['eventTypes'].append('IS_DATA')
1651
1652 if (
1653 'GeoAtlas' in md and 'ATLAS' in md['GeoAtlas']
1654 or 'DAOD_TRUTH' in md[key]['dataType']
1655 ):
1656 md['eventTypes'].append('IS_ATLAS')
1657 # this is probably safe to assume for all files used in AnalysisBase
1658 md['eventTypes'].append('IS_PHYSICS')
1659 else:
1660 md['eventTypes'].append('IS_TESTBEAM')
1661
1662 if mode == 'peeker':
1663 if 'productionRelease' in md[key]:
1664 md['AtlasRelease'] = md[key]['productionRelease']
1665
1666 if 'generatorsInfo' in md[key]:
1667 md['generators'] = md[key]['generatorsInfo']
1668
1669 if mode == 'lite':
1670 meta_dict[filename].pop(key)
1671 break
1672
1673 if '/TagInfo' in file_content:
1674 md.update(md['/TagInfo'])
1675 md.pop('/TagInfo')
1676
1677 if '/Generation/Parameters' in file_content:
1678 md.update(md['/Generation/Parameters'])
1679 md.pop('/Generation/Parameters')
1680
1681 if '/Simulation/Parameters' in file_content:
1682 md.update(md['/Simulation/Parameters'])
1683 md.pop('/Simulation/Parameters')
1684
1685 if '/Digitization/Parameters' in file_content:
1686 md.update(md['/Digitization/Parameters'])
1687 md.pop('/Digitization/Parameters')
1688
1689 if 'CutBookkeepers' in file_content:
1690 md.update(md['CutBookkeepers'])
1691 md.pop('CutBookkeepers')
1692
1693 return meta_dict
1694
1695

◆ read_metadata()

python.MetaReader.read_metadata ( filenames,
file_type = None,
mode = 'lite',
promote = None,
meta_key_filter = None,
unique_tag_info_values = True,
ignoreNonExistingLocalFiles = False )
This tool is independent of Athena framework and returns the metadata from a given file.
:param filenames: the input file from which metadata needs to be extracted.
:param file_type: the type of file. POOL or BS (bytestream: RAW, DRAW) files.
:param mode: if true, will return all metadata associated with the filename. By default, is false and this will
return a "tiny" version which have only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'.
:return: a dictionary of metadata for the given input file.

Definition at line 70 of file MetaReader.py.

71 unique_tag_info_values = True, ignoreNonExistingLocalFiles=False):
72 """
73 This tool is independent of Athena framework and returns the metadata from a given file.
74 :param filenames: the input file from which metadata needs to be extracted.
75 :param file_type: the type of file. POOL or BS (bytestream: RAW, DRAW) files.
76 :param mode: if true, will return all metadata associated with the filename. By default, is false and this will
77 return a "tiny" version which have only the following keys: 'file_guid', 'file_size', 'file_type', 'nentries'.
78 :return: a dictionary of metadata for the given input file.
79 """
80
81 # make the mode available in the _convert methods
82 global _gbl_mode
83 _gbl_mode = mode
84
85 from RootUtils import PyROOTFixes # noqa F401
86
87 # Check if the input is a file or a list of files.
88 if isinstance(filenames, str):
89 filenames = [filenames]
90
91 # Check if file_type is an allowed value
92 if file_type is not None:
93 if file_type not in ('POOL', 'BS'):
94 raise NameError('Allowed values for \'file_type\' parameter are: "POOL" or "BS": you provided "' + file_type + '"')
95 else:
96 msg.info('Forced file_type: {0}'.format(file_type))
97
98 # Check the value of mode parameter
99 if mode not in ('tiny', 'lite', 'full', 'peeker', 'iov'):
100 raise NameError('Allowed values for "mode" parameter are: "tiny", "lite", "peeker", "iov" or "full"')
101
102 if meta_key_filter is None:
103 meta_key_filter = []
104
105 # Disable 'full' and 'iov' in non-Gaudi environments
106 if not isGaudiEnv():
107 if mode in ('full', 'iov'):
108 raise NameError('The following modes are not available in AnalysisBase: "iov" and "full"')
109
110 msg.info('Current mode used: {0}'.format(mode))
111 msg.info('Current filenames: {0}'.format(filenames))
112
113 if mode != 'full' and mode !='iov' and len(meta_key_filter) > 0:
114 raise NameError('It is possible to use the meta_key_filter option only for full mode')
115 if meta_key_filter:
116 msg.info('Filter used: {0}'.format(meta_key_filter))
117
118 # create the storage object for metadata.
119 meta_dict = {}
120
121 # ----- retrieve metadata from all filename or filenames --------------------------------------------------------#
122 for filename in filenames:
123 meta_dict[filename] = {}
124 current_file_type = None
125 # Determine the file_type of the input and store this information into meta_dict
126 if not file_type:
127 if os.path.isfile(filename):
128
129 if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and gSystem.AccessPathName(filename): # Attention, bizarre convention of return value!!
130 msg.warn('Ignoring not accessible file: {}'.format(filename))
131 continue
132
133 with open(filename, 'rb') as binary_file:
134 magic_file = binary_file.read(4)
135
136 if magic_file == 'root' or magic_file == b'root':
137 current_file_type = 'POOL'
138 meta_dict[filename]['file_type'] = 'POOL'
139
140 elif Project.determine() in (
141 Project.AnalysisBase, Project.AthAnalysis):
142 raise RuntimeError(
143 f"{filename} is not a ROOT file, assumed bytestream"
144 ", this is not supported in Analysis releases")
145 else:
146 current_file_type = 'BS'
147 meta_dict[filename]['file_type'] = 'BS'
148
149 # add information about the file_size of the input filename
150 meta_dict[filename]['file_size'] = os.path.getsize(filename)
151
152 # determine the file type for the remote input files
153 else:
154 if regex_BS_files.match(filename):
155 current_file_type = 'BS'
156 meta_dict[filename]['file_type'] = 'BS'
157 else:
158 current_file_type = 'POOL'
159 meta_dict[filename]['file_type'] = 'POOL'
160
161 # add information about the file_size of the input filename
162 meta_dict[filename]['file_size'] = None # None -> we can't read the file size for a remote file
163
164 else:
165 current_file_type = file_type
166
167 # ----- retrieves metadata from POOL files ------------------------------------------------------------------#
168 if current_file_type == 'POOL':
169
170 if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and gSystem.AccessPathName(filename): # Attention, bizarre convention of return value!!
171 msg.warn('Ignoring not accessible file: {}'.format(filename))
172 continue
173
174 import ROOT
175 # open the file using ROOT.TFile
176 current_file = ROOT.TFile.Open( _get_pfn(filename) )
177
178 # get auto flush setting from the main EventData TTree
179 from PyUtils.PoolFile import PoolOpts
180 collectionTree = current_file.Get(PoolOpts.TTreeNames.EventData)
181 if isinstance(collectionTree, ROOT.TTree):
182 meta_dict[filename]['auto_flush'] = collectionTree.GetAutoFlush()
183
184 # read and add the 'GUID' value
185 meta_dict[filename]['file_guid'] = _read_guid(filename)
186
187 # read and add compression level and algorithm
188 meta_dict[filename]['file_comp_alg'] = current_file.GetCompressionAlgorithm()
189 meta_dict[filename]['file_comp_level'] = current_file.GetCompressionLevel()
190
191 if (
192 isRNTuple(md:=current_file.Get(PoolOpts.RNTupleNames.MetaData))
193 and mode != "tiny"
194 ):
195 msg.warning(
196 "Reading in-file metadata from RNTuple is currently of limited support"
197 )
198 meta_dict[filename]["metadata_items"] = {}
199
200 try:
201 from ROOT import RNTupleReader
202 except ImportError:
203 from ROOT.Experimental import RNTupleReader
204
205 reader = RNTupleReader.Open(md)
206 entry = reader.CreateEntry()
207 reader.LoadEntry(0, entry)
208 auxes = {}
209 classes_with_aux = {
210 "xAOD::FileMetaData_v1",
211 "xAOD::FileMetaDataAuxInfo_v1",
212 "xAOD::TriggerMenuJsonAuxContainer_v1",
213 "DataVector<xAOD::TriggerMenuJson_v1>",
214 "xAOD::TruthMetaDataAuxContainer_v1",
215 "DataVector<xAOD::TruthMetaData_v1>",
216 "xAOD::CutBookkeeperContainer_v1",
217 "xAOD::CutBookkeeperAuxContainer_v1",
218 "xAOD::LumiBlockRangeAuxContainer_v1",
219 "DataVector<xAOD::LumiBlockRange_v1>",
220 }
221
222 dynamic_fmd_items = {}
223
224 meta_filter = get_meta_filter(mode, meta_key_filter)
225
226 for field in reader.GetDescriptor().GetTopLevelFields():
227 normalizedName = field.GetFieldName()
228 if "index_ref" in normalizedName:
229 continue
230 if regexIOVMetaDataContainer.match(field.GetTypeName()):
231 # if field name is e.g. IOVMetaDataContainer_p1__Digitization_Parameters,
232 # strip the prefix and change underscore to slash to slash
233 normalizedName = (
234 field.GetFieldName()
235 .replace("IOVMetaDataContainer_p1_", "")
236 .replace("_", "/")
237 )
238 meta_dict[filename]["metadata_items"][normalizedName] = (
239 "IOVMetaDataContainer"
240 )
241 elif regexByteStreamMetadataContainer.match(field.GetTypeName()):
242 meta_dict[filename]["metadata_items"][field.GetFieldName()] = (
243 "ByteStreamMetadataContainer"
244 )
245 elif regexEventStreamInfo.match(field.GetTypeName()):
246 meta_dict[filename]["metadata_items"][field.GetFieldName()] = (
247 "EventStreamInfo"
248 )
249 elif regexXAODFileMetaData.match(field.GetTypeName()):
250 meta_dict[filename]["metadata_items"][
251 field.GetFieldName().replace("xAOD__", "xAOD::")
252 ] = field.GetTypeName()
253 elif regexXAODFileMetaDataAuxDyn.match(
254 normalizedName := field.GetFieldName()
255 .replace("xAOD__", "xAOD::")
256 .replace("AuxDyn:", "AuxDyn.")
257 ):
258 result = (
259 False
260 if entry[field.GetFieldName()] == "\x00"
261 else entry[field.GetFieldName()]
262 )
263 dynamic_fmd_items[normalizedName.split(".")[1]] = result
264 meta_dict[filename]["metadata_items"][normalizedName] = (
265 field.GetTypeName()
266 )
267 continue
268 elif regexXAODFileMetaDataAux.match(field.GetTypeName()):
269 meta_dict[filename]["metadata_items"][
270 field.GetFieldName()
271 .replace("xAOD__", "xAOD::")
272 .replace("Aux:", "Aux.")
273 ] = field.GetTypeName()
274 elif regexXAODTruthMetaData.match(field.GetTypeName()):
275 meta_dict[filename]["metadata_items"][
276 field.GetFieldName()
277 .replace("xAOD__", "xAOD::")
278 .replace("DataVector_", "DataVector<")
279 .replace("__Truth", ">_Truth")
280 ] = "TruthMetaData"
281 elif regexXAODTruthMetaDataAux.match(field.GetTypeName()):
282 meta_dict[filename]["metadata_items"][
283 field.GetFieldName()
284 .replace("xAOD__", "xAOD::")
285 .replace("Aux:", "Aux.")
286 ] = field.GetTypeName()
287 elif regexXAODEventFormat.match(field.GetTypeName()):
288 meta_dict[filename]["metadata_items"][
289 field.GetFieldName().replace("xAOD__", "xAOD::")
290 ] = field.GetTypeName()
291 elif regexXAODTriggerMenuJson.match(field.GetTypeName()):
292 meta_dict[filename]["metadata_items"][
293 field.GetFieldName()
294 .replace("xAOD__", "xAOD::")
295 .replace("DataVector_", "DataVector<")
296 .replace("__Trigger", ">_Trigger")
297 ] = field.GetTypeName()
298 elif regexXAODTriggerMenuJsonAux.match(field.GetTypeName()):
299 meta_dict[filename]["metadata_items"][
300 field.GetFieldName()
301 .replace("xAOD__", "xAOD::")
302 .replace("Aux:", "Aux.")
303 ] = field.GetTypeName()
304 elif regexXAODCutBookkeeperContainer.match(field.GetTypeName()):
305 meta_dict[filename]["metadata_items"][
306 field.GetFieldName()
307 .replace("xAOD__", "xAOD::")
308 .replace("DataVector_", "DataVector<")
309 .replace("__CutBookkeeper", ">_CutBookkeeper")
310 ] = field.GetTypeName()
311 elif regexXAODCutBookkeeperContainerAux.match(field.GetTypeName()):
312 meta_dict[filename]["metadata_items"][
313 field.GetFieldName()
314 .replace("xAOD__", "xAOD::")
315 .replace("Aux:", "Aux.")
316 ] = field.GetTypeName()
317 else:
318 meta_dict[filename]["metadata_items"][
319 field.GetFieldName().replace("Aux:", "Aux.")
320 ] = field.GetTypeName()
321
322 if field.GetTypeName() in classes_with_aux:
323 # handle aux classes later
324 auxes[field.GetFieldName()] = field.GetTypeName()
325 continue
326
327 if not should_keep_meta(
328 normalizedName, field.GetTypeName(), meta_filter
329 ):
330 continue
331
332 try:
333 meta_dict[filename][normalizedName] = _convert_value(
334 entry[field.GetFieldName()]
335 )
336 except KeyError:
337 msg.warning(f"missing type {field.GetTypeName()}")
338
339 meta_dict[filename]["metadata_items"] = denormalize_metadata_types(
340 meta_dict[filename]["metadata_items"]
341 )
342
343 def _get_aux_base(aux_key: str) -> str:
344 # Remove known prefixes
345 key = aux_key
346 key = key.replace("xAOD__TriggerMenuJsonAuxContainer_v1_", "")
347 key = key.replace("xAOD__FileMetaDataAuxInfo_v1_", "")
348 key = key.replace("xAOD__TruthMetaDataAuxContainer_v1_", "")
349 # Remove known suffixes
350 if key.endswith("Aux:"):
351 key = key[:-4]
352 elif key.endswith("Aux"):
353 key = key[:-3]
354 # Remove any trailing ':' or '_'
355 key = key.strip("_:")
356 return key
357
358 def _get_main_base(main_key: str) -> str:
359 main_base = main_key
360 # For DataVectors
361 if main_key.startswith("DataVector_xAOD__TriggerMenuJson_v1__"):
362 main_base = main_key.replace(
363 "DataVector_xAOD__TriggerMenuJson_v1__", ""
364 )
365 # For FileMetaData
366 elif main_key.startswith("xAOD__FileMetaData_v1_"):
367 main_base = main_key.replace("xAOD__FileMetaData_v1_", "")
368 # For TruthMetaData
369 elif main_key.startswith("DataVector_xAOD__TruthMetaData_v1__"):
370 main_base = main_key.replace(
371 "DataVector_xAOD__TruthMetaData_v1__", ""
372 )
373 return main_base
374
375 def _find_associated_pairs(auxes: dict) -> list[tuple[str, str]]:
376 # Build lookup tables
377 aux_map = {}
378 for k in auxes:
379 if "Aux" in k:
380 aux_map[_get_aux_base(k)] = k
381
382 main_map = {}
383 for k in auxes:
384 base = _get_main_base(k)
385 if base:
386 main_map[base] = k
387
388 # Find pairs
389 pairs = []
390 for base, aux_key in aux_map.items():
391 if base in main_map:
392 pairs.append((aux_key, main_map[base]))
393 return pairs
394
395 for pair in _find_associated_pairs(auxes):
396 return_obj = _convert_value(
397 entry[pair[1]],
398 entry[pair[0]],
399 )
400 key = next(
401 (
402 k
403 for k, v in trigger_menu_json_map.items()
404 if v
405 == pair[1]
406 .replace("xAOD__", "xAOD::")
407 .replace("DataVector_", "DataVector<")
408 .replace("__Trigger", ">_Trigger")
409 ),
410 auxes[pair[0]],
411 )
412
413 try:
414 key = (
415 key.replace("xAOD__", "xAOD::")
416 if key.count("_") <= 1
417 else key.replace("xAOD__", "xAOD::").rsplit("_", 2)[0]
418 )
419 except IndexError:
420 pass
421
422 if not should_keep_meta(
423 pair[0]
424 .replace("xAOD__", "xAOD::")
425 .replace("DataVector_", "DataVector<")
426 .replace("__Trigger", ">_Trigger")
427 .replace("Aux:", "Aux."),
428 key,
429 meta_filter,
430 ):
431 continue
432
433 if "TriggerMenuJson" in pair[0]:
434 if "RAWTriggerMenuJson" in return_obj:
435 key = (
436 pair[1]
437 if pair[0].startswith("Trigger")
438 else trigger_menu_json_map[pair[0]]
439 )
440 meta_dict[filename][key] = return_obj["RAWTriggerMenuJson"]
441 del return_obj["RAWTriggerMenuJson"]
442 if "TriggerConfigInfo" not in meta_dict[filename]:
443 meta_dict[filename]["TriggerConfigInfo"] = {}
444 if "dbkey" in return_obj:
445 meta_dict[filename]["TriggerConfigInfo"][
446 pair[0].split("_")[-1].replace("Aux:", "")
447 ] = {"key": return_obj["dbkey"], "name": return_obj["name"]}
448 del return_obj["dbkey"]
449 del return_obj["name"]
450 if "TriggerMenu" not in meta_dict[filename]:
451 meta_dict[filename]["TriggerMenu"] = {}
452 meta_dict[filename]["TriggerMenu"].update(return_obj)
453 elif "FileMetaData" in pair[0]:
454 if "FileMetaData" not in meta_dict[filename]:
455 meta_dict[filename]["FileMetaData"] = dynamic_fmd_items
456 meta_dict[filename]["FileMetaData"].update(return_obj)
457 elif "TruthMetaData" in pair[0]:
458 if pair == ("TruthMetaDataAux:", "TruthMetaData"):
459 if "TruthMetaData" not in meta_dict[filename]:
460 meta_dict[filename]["TruthMetaData"] = {}
461 meta_dict[filename]["TruthMetaData"].update(return_obj)
462 else:
463 # for backward compatibility
464 meta_dict[filename][
465 pair[1]
466 .replace("xAOD__", "xAOD::")
467 .replace("DataVector_", "DataVector<")
468 .replace("__Truth", ">_Truth")
469 ] = {}
470 meta_dict[filename][
471 pair[0]
472 .replace("xAOD__", "xAOD::")
473 .replace("Aux:", "Aux.")
474 ] = {}
475 elif pair == ("CutBookkeepersAux:", "CutBookkeepers"):
476 meta_dict[filename]["CutBookkeepers"] = return_obj
477
478 msg.debug(f"Read metadata from RNTuple: {meta_dict[filename]}")
479
480 else:
481 # ----- read extra metadata required for 'lite' and 'full' modes ----------------------------------------#
482 if mode != 'tiny':
483 # selecting from all tree the only one which contains metadata, respectively "MetaData"
484 metadata_tree = current_file.Get('MetaData')
485 # read all list of branches stored in "MetaData" tree
486 metadata_branches = metadata_tree.GetListOfBranches()
487 nr_of_branches = metadata_branches.GetEntriesFast()
488
489 # object to store the names of metadata containers and their corresponding class name.
490 meta_dict[filename]['metadata_items'] = {}
491
492 meta_filter = get_meta_filter(mode, meta_key_filter)
493
494 # store all persistent classes for metadata container existing in a POOL/ROOT file.
495 persistent_instances = {}
496 dynamic_fmd_items = {}
497
498 # Protect non-Gaudi environments from meta-data classes it doesn't know about
499 if not isGaudiEnv():
500 metadata_tree.SetBranchStatus("*", False)
501
502 for i in range(0, nr_of_branches):
503 branch = metadata_branches.At(i)
504 name = branch.GetName()
505 if name == 'index_ref':
506 # skip the index branch
507 continue
508
509 class_name = branch.GetClassName()
510
511 if regexIOVMetaDataContainer.match(class_name):
512 name = name.replace('IOVMetaDataContainer_p1_', '').replace('_', '/')
513
514 if regexIOVMetaDataContainer.match(class_name):
515 meta_dict[filename]['metadata_items'][name] = 'IOVMetaDataContainer'
516 elif regexByteStreamMetadataContainer.match(class_name):
517 meta_dict[filename]['metadata_items'][name] = 'ByteStreamMetadataContainer'
518 elif regexEventStreamInfo.match(class_name):
519 meta_dict[filename]['metadata_items'][name] = 'EventStreamInfo'
520 elif regexXAODFileMetaData.match(class_name):
521 meta_dict[filename]['metadata_items'][name] = 'FileMetaData'
522 elif regexXAODTruthMetaData.match(class_name):
523 meta_dict[filename]['metadata_items'][name] = 'TruthMetaData'
524 else:
525 type_name = class_name
526 if not type_name:
527 try:
528 type_name = branch.GetListOfLeaves()[0].GetTypeName()
529 except IndexError:
530 pass
531 meta_dict[filename]['metadata_items'][name] = type_name
532
533 if len(meta_filter) > 0:
534 keep = False
535 for filter_key, filter_class in meta_filter.items():
536 if (filter_key.replace('/', '_') in name.replace('/', '_') or filter_key == '*') and fnmatchcase(class_name, filter_class):
537 if 'CutBookkeepers' in filter_key:
538 keep = filter_key == name
539 if keep:
540 break
541 else:
542 keep = True
543 break
544
545 if not keep:
546 continue
547 else:
548 # CutBookkeepers should always be filtered:
549 if 'CutBookkeepers' in name and name not in ['CutBookkeepers', 'CutBookkeepersAux.']:
550 continue
551
552 if not isGaudiEnv():
553 metadata_tree.SetBranchStatus(f"{name}*", True)
554
555 # assign the corresponding persistent class based of the name of the metadata container
556 if regexEventStreamInfo.match(class_name):
557 if class_name.endswith('_p1'):
558 persistent_instances[name] = ROOT.EventStreamInfo_p1()
559 elif class_name.endswith('_p2'):
560 persistent_instances[name] = ROOT.EventStreamInfo_p2()
561 else:
562 persistent_instances[name] = ROOT.EventStreamInfo_p3()
563 elif regexIOVMetaDataContainer.match(class_name):
564 persistent_instances[name] = ROOT.IOVMetaDataContainer_p1()
565 elif regexXAODEventFormat.match(class_name):
566 persistent_instances[name] = ROOT.xAOD.EventFormat_v1()
567 elif regexXAODTriggerMenu.match(class_name) and _check_project() not in ['AthGeneration']:
568 persistent_instances[name] = ROOT.xAOD.TriggerMenuContainer_v1()
569 elif regexXAODTriggerMenuAux.match(class_name) and _check_project() not in ['AthGeneration']:
570 persistent_instances[name] = ROOT.xAOD.TriggerMenuAuxContainer_v1()
571 elif regexXAODTriggerMenuJson.match(class_name) and _check_project() not in ['AthGeneration']:
572 persistent_instances[name] = ROOT.xAOD.TriggerMenuJsonContainer_v1()
573 elif regexXAODTriggerMenuJsonAux.match(class_name) and _check_project() not in ['AthGeneration']:
574 persistent_instances[name] = ROOT.xAOD.TriggerMenuJsonAuxContainer_v1()
575 elif regexXAODCutBookkeeperContainer.match(class_name):
576 persistent_instances[name] = ROOT.xAOD.CutBookkeeperContainer_v1()
577 elif regexXAODCutBookkeeperContainerAux.match(class_name):
578 persistent_instances[name] = ROOT.xAOD.CutBookkeeperAuxContainer_v1()
579 elif regexXAODFileMetaData.match(class_name):
580 persistent_instances[name] = ROOT.xAOD.FileMetaData_v1()
581 elif regexXAODFileMetaDataAux.match(class_name):
582 persistent_instances[name] = ROOT.xAOD.FileMetaDataAuxInfo_v1()
583 elif regexXAODTruthMetaData.match(class_name):
584 persistent_instances[name] = ROOT.xAOD.TruthMetaDataContainer_v1()
585 elif regexXAODTruthMetaDataAux.match(class_name):
586 persistent_instances[name] = ROOT.xAOD.TruthMetaDataAuxContainer_v1()
587
588 if name in persistent_instances:
589 branch.SetAddress(ROOT.AddressOf(persistent_instances[name]))
590
591 # This creates a dict to store the dynamic attributes of the xAOD::FileMetaData
592 dynamicFMD = regexXAODFileMetaDataAuxDyn.match(name)
593 if dynamicFMD:
594 dynamicName = dynamicFMD.group().split('.')[-1]
595 dynamicType = regex_cppname.match(class_name)
596 if dynamicType:
597 # this should be a string
598 dynamic_fmd_items[dynamicName] = ROOT.std.string()
599 branch.SetAddress(ROOT.AddressOf(dynamic_fmd_items[dynamicName]))
600 else:
601 dynamic_fmd_items[dynamicName] = None
602
603
604 metadata_tree.GetEntry(0)
605
606 # This loads the dynamic attributes of the xAOD::FileMetaData from the TTree
607 for key in dynamic_fmd_items:
608 if dynamic_fmd_items[key] is None:
609 try:
610 if key.startswith("is"):
611 # this is probably a boolean
612 dynamic_fmd_items[key] = getattr(metadata_tree, key) != '\x00'
613 else:
614 # this should be a float
615 dynamic_fmd_items[key] = getattr(metadata_tree, key)
616 except AttributeError:
617 # should not happen, but just ignore missing attributes
618 pass
619 else:
620 # convert ROOT.std.string objects to python equivalent
621 dynamic_fmd_items[key] = str(dynamic_fmd_items[key])
622
623 # clean the meta-dict if the meta_key_filter flag is used, to return only the key of interest
624 if meta_key_filter:
625 meta_dict[filename] = {}
626
627 # read the metadata
628 for name, content in persistent_instances.items():
629 key = name
630 if hasattr(content, 'm_folderName'):
631 key = content.m_folderName
632
633 # Some transition AODs contain both the Run2 and Run3 metadata formats. We only wish to read the Run3 format if such a file is encountered.
634 has_r3_trig_meta = ('TriggerMenuJson_HLT' in persistent_instances or 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT' in persistent_instances)
635 aux = None
636 if key.startswith('TriggerMenuJson_') and not key.endswith('Aux.'): # interface container for the menu (AOD)
637 aux = persistent_instances[key+'Aux.']
638 elif key.startswith('DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_') and not key.endswith('Aux.'): # interface container for the menu (ESD)
639 menuPart = key.split('_')[-1]
640 aux = persistent_instances['xAOD::TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_'+menuPart+'Aux.']
641 elif key == 'TriggerMenu' and 'TriggerMenuAux.' in persistent_instances and not has_r3_trig_meta: # AOD case (legacy support, HLT and L1 menus)
642 aux = persistent_instances['TriggerMenuAux.']
643 elif key == 'DataVector<xAOD::TriggerMenu_v1>_TriggerMenu' and 'xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.' in persistent_instances and not has_r3_trig_meta: # ESD case (legacy support, HLT and L1 menus)
644 aux = persistent_instances['xAOD::TriggerMenuAuxContainer_v1_TriggerMenuAux.']
645 elif (key == 'CutBookkeepers'
646 and 'CutBookkeepersAux.' in persistent_instances):
647 aux = persistent_instances['CutBookkeepersAux.']
648 elif key == 'CutBookkeepersAux.':
649 continue # Extracted using the interface object
650 elif (key == 'FileMetaData'
651 and 'FileMetaDataAux.' in persistent_instances):
652 aux = persistent_instances['FileMetaDataAux.']
653 elif (key == 'xAOD::FileMetaData_v1_FileMetaData'
654 and 'xAOD::FileMetaDataAuxInfo_v1_FileMetaDataAux.' in persistent_instances):
655 aux = persistent_instances['xAOD::FileMetaDataAuxInfo_v1_FileMetaDataAux.']
656 elif (key == 'TruthMetaData'
657 and 'TruthMetaDataAux.' in persistent_instances):
658 aux = persistent_instances['TruthMetaDataAux.']
659 elif key == 'TruthMetaDataAux.':
660 continue # Extracted using the interface object
661 elif 'Menu' in key and key.endswith('Aux.'):
662 continue # Extracted using the interface object
663
664 return_obj = _convert_value(content, aux)
665
666 if 'TriggerMenuJson' in key or ('TriggerMenu' in key and not has_r3_trig_meta):
667 if 'RAWTriggerMenuJson' in return_obj:
668 meta_dict[filename][key] = return_obj['RAWTriggerMenuJson']
669 del return_obj['RAWTriggerMenuJson']
670 if 'TriggerConfigInfo' not in meta_dict[filename]:
671 meta_dict[filename]['TriggerConfigInfo'] = {}
672 if 'dbkey' in return_obj:
673 meta_dict[filename]['TriggerConfigInfo'][key.split('_')[-1]] = {
674 'key' : return_obj['dbkey'],
675 'name': return_obj['name']
676 }
677 del return_obj['dbkey']
678 del return_obj['name']
679 if 'TriggerMenu' not in meta_dict[filename]:
680 meta_dict[filename]['TriggerMenu'] = {}
681 meta_dict[filename]['TriggerMenu'].update(return_obj)
682 elif "FileMetaData" in key:
683 if "FileMetaData" not in meta_dict[filename]:
684 meta_dict[filename]["FileMetaData"] = dynamic_fmd_items
685 meta_dict[filename]["FileMetaData"].update(return_obj)
686 else:
687 meta_dict[filename][key] = return_obj
688
689 try:
690 # get the number of events from EventStreamInfo
691 esi_dict = next(key for key, value in meta_dict[filename].items()
692 if isinstance(value, dict) and "numberOfEvents" in value and
693 meta_dict[filename]["metadata_items"][key] == "EventStreamInfo")
694 msg.debug(f"{esi_dict=}")
695 meta_dict[filename]["nentries"] = meta_dict[filename][esi_dict]["numberOfEvents"]
696 except StopIteration as err:
697 msg.debug(f"Caught {err=}, {type(err)=}, falling back on opening the DataHeader"
698 " Container to read the number of entries")
699 meta_dict[filename]['nentries'] = dataheader_nentries(current_file)
700 msg.debug(f"{meta_dict[filename]['nentries']=}")
701
702 if unique_tag_info_values and mode=='iov':
703 unique_tag_info_values = False
704 msg.info('disabling "unique_tag_info_values" option for "iov" mode')
705
706 # This is a required workaround which will temporarily be fixing ATEAM-560 originated from ATEAM-531
707 # ATEAM-560: https://its.cern.ch/jira/browse/ATEAM-560
708 # ATEAM-531: https://its.cern.ch/jira/browse/ATEAM-531
709 # This changes will remove all duplicates values presented in some files due
710 # to the improper merging of two IOVMetaDataContainers.
711 if unique_tag_info_values:
712 msg.info('MetaReader is called with the parameter "unique_tag_info_values" set to True. '
713 'This is a workaround to remove all duplicate values from "/TagInfo" key')
714 if '/TagInfo' in meta_dict[filename]:
715 for key, value in meta_dict[filename]['/TagInfo'].items():
716 if isinstance(value, list) and value:
717 if len(unique_values := set(value)) > 1:
718 msg.warn(
719 f"Found multiple values for {key}: {value}. "
720 "Looking for possible duplicates."
721 )
722 maybe_ok = False
723 if key == "AMITag":
724 # curate duplicates like: ['s3681_q453', 's3681_q453_'] or ["s3681_q453", "q453_s3681"]
725 unique_amitags = set()
726 for amitags in unique_values:
727 unique_amitags.add(
728 "_".join({tag for tag in amitags.split("_") if tag})
729 )
730 if len(unique_amitags) == 1:
731 maybe_ok = True
732 elif key == "beam_energy":
733 # handle duplicates like: ['6500000', '6500000.0'] or [3, "3"]
734 unique_energies = set()
735 for energy in unique_values:
736 try:
737 energy = int(energy)
738 except ValueError:
739 try:
740 energy = float(energy)
741 except ValueError:
742 pass
743 unique_energies.add(energy)
744 if len(unique_energies) == 1:
745 maybe_ok = True
746 elif key in ["AtlasRelease", "IOVDbGlobalTag", "AODFixVersion"]:
747 maybe_ok = True
748 if maybe_ok:
749 msg.warn(
750 f"Multiple values for {key} may mean the same, or "
751 "the input file was produced in multi-step job. "
752 f"Ignoring all but the first entry: {key} = {value[0]}"
753 )
754 else:
755 raise ValueError(
756 f"{key} from /TagInfo contains more than 1 unique value: {value}"
757 )
758
759 meta_dict[filename]['/TagInfo'][key] = value[0]
760
761 if promote is None:
762 promote = mode == 'lite' or mode == 'peeker'
763
764 # Filter the data and create a prettier output for the 'lite' mode
765 if mode == 'lite':
766 meta_dict = make_lite(meta_dict)
767
768 if mode == 'peeker':
769 meta_dict = make_peeker(meta_dict)
770
771 if promote:
772 meta_dict = promote_keys(meta_dict, mode)
773
774 # If AnalysisBase the itemList must be grabbed another way
775 if not isGaudiEnv():
776 if isinstance(collectionTree, ROOT.TTree):
777 meta_dict[filename]['itemList'] = [ (b.GetClassName(), b.GetName()) for b in collectionTree.GetListOfBranches() ]
778
779 # ----- retrieves metadata from bytestream (BS) files (RAW, DRAW) ------------------------------------------#
780 elif current_file_type == 'BS':
781
782 if ignoreNonExistingLocalFiles and not regex_URI_scheme.match(filename) and not os.path.isfile(filename):
783 msg.warn('Ignoring not accessible file: {}'.format(filename))
784 continue
785
786 import eformat
787
788 # store the number of entries
789 bs = eformat.istream(filename)
790 meta_dict[filename]['nentries'] = bs.total_events
791
792 # store the 'guid' value
793 data_reader = eformat.EventStorage.pickDataReader(filename)
794 assert data_reader, 'problem picking a data reader for file [%s]' % filename
795
796 # set auto flush equivalent, which for BS is always 1
797 meta_dict[filename]['auto_flush'] = 1
798
799 if hasattr(data_reader, 'GUID'):
800 meta_dict[filename]['file_guid'] = data_reader.GUID()
801
802 # compression level and algorithm, for BS always ZLIB
803 meta_dict[filename]['file_comp_alg'] = 1
804 meta_dict[filename]['file_comp_level'] = 1
805
806
807 # if the flag full is set to true then grab all metadata
808 # ------------------------------------------------------------------------------------------------------#
809 if mode != "tiny":
810 bs_metadata = {}
811
812 for md in data_reader.freeMetaDataStrings():
813 if md.startswith('Event type:'):
814 k = 'eventTypes'
815 v = []
816 if 'is sim' in md:
817 v.append('IS_SIMULATION')
818 else:
819 v.append('IS_DATA')
820
821 if 'is atlas' in md:
822 v.append('IS_ATLAS')
823 else:
824 v.append('IS_TESTBEAM')
825
826 if 'is physics' in md:
827 v.append('IS_PHYSICS')
828 else:
829 v.append('IS_CALIBRATION')
830
831 bs_metadata[k] = tuple(v)
832
833 elif md.startswith('GeoAtlas:'):
834 k = 'geometry'
835 v = md.split('GeoAtlas:')[1].strip()
836 bs_metadata[k] = v
837
838 elif md.startswith('IOVDbGlobalTag:'):
839 k = 'conditions_tag'
840 v = md.split('IOVDbGlobalTag:')[1].strip()
841 bs_metadata[k] = v
842
843 elif '=' in md:
844 k, v = md.split('=', 1) # Split on first '=' only
845 bs_metadata[k] = v
846
847 bs_metadata['detectorMask'] = data_reader.detectorMask()
848 bs_metadata['runNumbers'] = data_reader.runNumber()
849 bs_metadata['lumiBlockNumbers'] = data_reader.lumiblockNumber()
850 bs_metadata['projectTag'] = data_reader.projectTag()
851 bs_metadata['stream'] = data_reader.stream()
852 #bs_metadata['beamType'] = getattr(data_reader, 'beamType')()
853 beamTypeNbr= data_reader.beamType()
854 #According to info from Rainer and Guiseppe the beam type is
855 #O: no beam
856 #1: protons
857 #2: ions
858 if (beamTypeNbr==0): bs_metadata['beamType'] = 'cosmics'
859 elif (beamTypeNbr==1 or beamTypeNbr==2): bs_metadata['beamType'] = 'collisions'
860 else: bs_metadata['beamType'] = 'unknown'
861
862 bs_metadata['beamEnergy'] = data_reader.beamEnergy()
863
864 meta_dict[filename]['eventTypes'] = bs_metadata.get('eventTypes', [])
865 meta_dict[filename]['GeoAtlas'] = bs_metadata.get('geometry', None)
866 meta_dict[filename]['conditions_tag'] = bs_metadata.get('conditions_tag', None)
867 meta_dict[filename]['project_name'] = bs_metadata.get('projectTag', None)
868
869 # Promote up one level
870 meta_dict[filename]['detectorMask'] = [bs_metadata.get('detectorMask', None)]
871 meta_dict[filename]['runNumbers'] = [bs_metadata.get('runNumbers', None)]
872 meta_dict[filename]['lumiBlockNumbers'] = [bs_metadata.get('lumiBlockNumbers', None)]
873 meta_dict[filename]['beam_type'] = bs_metadata.get('beamType', None)
874 meta_dict[filename]['beam_energy'] = bs_metadata.get('beamEnergy', None)
875 meta_dict[filename]['stream'] = bs_metadata.get('stream', None)
876
877 if not data_reader.good():
878 # event-less file...
879 meta_dict[filename]['runNumbers'].append(bs_metadata.get('run_number', 0))
880 meta_dict[filename]['lumiBlockNumbers'].append(bs_metadata.get('LumiBlock', 0))
881
882 msg.debug(f"{meta_dict[filename]=}")
883 msg.debug(f"{len(bs)=}")
884 if len(bs):
885 evt = bs[0]
886 try:
887 evt.check()
888 meta_dict[filename]['processingTags'] = [tag.name for tag in evt.stream_tag()]
889 meta_dict[filename]['evt_number'] = [evt.global_id()]
890 meta_dict[filename]['run_type'] = [eformat.helper.run_type2string(evt.run_type())]
891 # ATLASRECTS-7126: If there is no valid lumiblock information
892 # in the ByteStream header, get the info from the first event.
893 if meta_dict[filename]['lumiBlockNumbers'] == [0]:
894 msg.debug('Taking the luminosity block info from the first event (%i)', evt.lumi_block())
895 meta_dict[filename]['lumiBlockNumbers'] = [evt.lumi_block()]
896 # ATLASRECTS-7126: If there is no valid run number information
897 # in the ByteStream header, get the info from the first event.
898 if meta_dict[filename]['runNumbers'] == [0]:
899 msg.debug('Taking the run number info from the first event (%i)', evt.run_no())
900 meta_dict[filename]['runNumbers'] = [evt.run_no()]
901 except RuntimeError as err:
902 msg.error("Issue while reading the first event of BS file %r: %r", filename, err)
903 else:
904 msg.debug(f"{meta_dict[filename]=}")
905 else:
906 msg.warn(f"Event-less BS {filename=}, will not read metadata information from the first event")
907
908 # fix for ATEAM-122
909 if len(bs_metadata.get('eventTypes', '')) == 0: # see: ATMETADATA-6
910 evt_type = ['IS_DATA', 'IS_ATLAS']
911 if bs_metadata.get('stream', '').startswith('physics_'):
912 evt_type.append('IS_PHYSICS')
913 elif bs_metadata.get('stream', '').startswith('calibration_'):
914 evt_type.append('IS_CALIBRATION')
915 elif bs_metadata.get('projectTag', '').endswith('_calib'):
916 evt_type.append('IS_CALIBRATION')
917 else:
918 evt_type.append('Unknown')
919
920 meta_dict[filename]['eventTypes'] = evt_type
921
922 if mode == 'full':
923 meta_dict[filename]['bs_metadata'] = bs_metadata
924
925 # ------ Throw an error if the user provide other file types -------------------------------------------------#
926 else:
927 msg.error('Unknown filetype for {0} - there is no metadata interface for type {1}'.format(filename, current_file_type))
928 return None
929
930 return meta_dict
931
932
STL class.
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition hcg.cxx:310

◆ should_keep_meta()

python.MetaReader.should_keep_meta ( normalizedName,
typeName,
meta_filter )
Helper function to determine if metadata should be kept based on meta_filter.

Definition at line 1879 of file MetaReader.py.

1879def should_keep_meta(normalizedName, typeName, meta_filter):
1880 """
1881 Helper function to determine if metadata should be kept based on meta_filter.
1882 """
1883 if len(meta_filter) == 0:
1884 return True
1885
1886 for filter_key, filter_class in meta_filter.items():
1887 if (
1888 filter_key.replace("/", "_") in normalizedName.replace("/", "_")
1889 or filter_key == "*"
1890 ) and fnmatchcase(typeName, filter_class):
1891 if "CutBookkeepers" in filter_key:
1892 keep = filter_key == normalizedName
1893 if keep:
1894 return True
1895 else:
1896 return True
1897 return False

Variable Documentation

◆ lite_primary_keys_to_keep

list python.MetaReader.lite_primary_keys_to_keep
Initial value:
1= [
2 'lumiBlockNumbers', 'runNumbers', 'mc_event_number', 'mc_channel_number',
3 'eventTypes', 'processingTags', 'itemList']

Definition at line 37 of file MetaReader.py.

◆ lite_TagInfo_keys_to_keep

list python.MetaReader.lite_TagInfo_keys_to_keep
Initial value:
1= [
2 'beam_energy', 'beam_type', 'GeoAtlas', 'IOVDbGlobalTag',
3 'AODFixVersion', 'project_name', 'mc_campaign', 'keywords']

Definition at line 40 of file MetaReader.py.

◆ msg

python.MetaReader.msg = logging.getLogger('MetaReader')

Definition at line 13 of file MetaReader.py.

◆ regex_BS_files

python.MetaReader.regex_BS_files = re.compile(r'^(\w+):.*((\.D?RAW\..*)|(\.data$))')

Definition at line 34 of file MetaReader.py.

◆ regex_cppname

python.MetaReader.regex_cppname = re.compile(r'^([\w:]+)(<.*>)?$')

Definition at line 31 of file MetaReader.py.

◆ regex_persistent_class

python.MetaReader.regex_persistent_class = re.compile(r'^([a-zA-Z]+(_[pv]\d+)?::)*[a-zA-Z]+_[pv]\d+$')

Definition at line 33 of file MetaReader.py.

◆ regex_URI_scheme

python.MetaReader.regex_URI_scheme = re.compile(r'^([A-Za-z0-9\+\.\-]+)\:')

Definition at line 35 of file MetaReader.py.

◆ regexByteStreamMetadataContainer

python.MetaReader.regexByteStreamMetadataContainer = re.compile(r'^ByteStreamMetadataContainer(_p\d+)?$')

Definition at line 18 of file MetaReader.py.

◆ regexEventStreamInfo

python.MetaReader.regexEventStreamInfo = re.compile(r'^EventStreamInfo(_p\d+)?$')

Definition at line 16 of file MetaReader.py.

◆ regexIOVMetaDataContainer

python.MetaReader.regexIOVMetaDataContainer = re.compile(r'^IOVMetaDataContainer(_p\d+)?$')

Definition at line 17 of file MetaReader.py.

◆ regexXAODCutBookkeeperContainer

python.MetaReader.regexXAODCutBookkeeperContainer = re.compile(r'^xAOD::CutBookkeeperContainer(_v\d+)?$')

Definition at line 19 of file MetaReader.py.

◆ regexXAODCutBookkeeperContainerAux

python.MetaReader.regexXAODCutBookkeeperContainerAux = re.compile(r'^xAOD::CutBookkeeperAuxContainer(_v\d+)?$')

Definition at line 20 of file MetaReader.py.

◆ regexXAODEventFormat

python.MetaReader.regexXAODEventFormat = re.compile(r'^xAOD::EventFormat(_v\d+)?$')

Definition at line 21 of file MetaReader.py.

◆ regexXAODFileMetaData

python.MetaReader.regexXAODFileMetaData = re.compile(r'^xAOD::FileMetaData(_v\d+)?$')

Definition at line 22 of file MetaReader.py.

◆ regexXAODFileMetaDataAux

python.MetaReader.regexXAODFileMetaDataAux = re.compile(r'^xAOD::FileMetaDataAuxInfo(_v\d+)?$')

Definition at line 23 of file MetaReader.py.

◆ regexXAODFileMetaDataAuxDyn

python.MetaReader.regexXAODFileMetaDataAuxDyn = re.compile(r'^(xAOD::)?FileMetaData.*AuxDyn(\.[a-zA-Z0-9]+)?$')

Definition at line 24 of file MetaReader.py.

◆ regexXAODTriggerMenu

python.MetaReader.regexXAODTriggerMenu = re.compile(r'^DataVector<xAOD::TriggerMenu(_v\d+)?>$')

Definition at line 25 of file MetaReader.py.

◆ regexXAODTriggerMenuAux

python.MetaReader.regexXAODTriggerMenuAux = re.compile(r'^xAOD::TriggerMenuAuxContainer(_v\d+)?$')

Definition at line 26 of file MetaReader.py.

◆ regexXAODTriggerMenuJson

python.MetaReader.regexXAODTriggerMenuJson = re.compile(r'^DataVector<xAOD::TriggerMenuJson(_v\d+)?>$')

Definition at line 27 of file MetaReader.py.

◆ regexXAODTriggerMenuJsonAux

python.MetaReader.regexXAODTriggerMenuJsonAux = re.compile(r'^xAOD::TriggerMenuJsonAuxContainer(_v\d+)?$')

Definition at line 28 of file MetaReader.py.

◆ regexXAODTruthMetaData

python.MetaReader.regexXAODTruthMetaData = re.compile(r'^DataVector<xAOD::TruthMetaData(_v\d+)?>$')

Definition at line 29 of file MetaReader.py.

◆ regexXAODTruthMetaDataAux

python.MetaReader.regexXAODTruthMetaDataAux = re.compile(r'^xAOD::TruthMetaDataAuxContainer(_v\d+)?$')

Definition at line 30 of file MetaReader.py.

◆ trigger_keys

list python.MetaReader.trigger_keys
Initial value:
1= [
2 'TriggerConfigInfo',
3 'TriggerMenu', 'TriggerMenuJson_BG', 'TriggerMenuJson_HLT', 'TriggerMenuJson_HLTMonitoring', 'TriggerMenuJson_HLTPS', 'TriggerMenuJson_L1', 'TriggerMenuJson_L1PS',
4 '/TRIGGER/HLT/Groups', '/TRIGGER/HLT/HltConfigKeys', '/TRIGGER/HLT/Menu', '/TRIGGER/HLT/PrescaleKey', '/TRIGGER/HLT/Prescales',
5 '/TRIGGER/LVL1/ItemDef', '/TRIGGER/LVL1/Lvl1ConfigKey', '/TRIGGER/LVL1/Menu', '/TRIGGER/LVL1/Prescales', '/TRIGGER/LVL1/Thresholds',
6 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenu', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_BG', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT',
7 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS',
8 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1', 'DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS',
9]

Definition at line 44 of file MetaReader.py.

◆ trigger_menu_json_map

dict python.MetaReader.trigger_menu_json_map
Initial value:
1= {
2 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1PSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS",
3 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_BGAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_BG",
4 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT",
5 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTMonitoringAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring",
6 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_HLTPSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS",
7 "xAOD__TriggerMenuJsonAuxContainer_v1_TriggerMenuJson_L1Aux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1",
8 "TriggerMenuJson_L1PSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1PS",
9 "TriggerMenuJson_BGAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_BG",
10 "TriggerMenuJson_HLTAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLT",
11 "TriggerMenuJson_HLTMonitoringAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTMonitoring",
12 "TriggerMenuJson_HLTPSAux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_HLTPS",
13 "TriggerMenuJson_L1Aux:": "DataVector<xAOD::TriggerMenuJson_v1>_TriggerMenuJson_L1",
14}

Definition at line 54 of file MetaReader.py.