ATLAS Offline Software
FilePeekerTool.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
2 
3 # @file PyUtils.FilePeekerTool
4 # @purpose peek into APR files to read in-file metadata without Athena (based on PyAthena.FilePeekerLib code by Sebastian Binet)
5 # @author Alexandre Vaniachine <vaniachine@anl.gov>
6 # @date May 2015
7 
8 __author__ = "Alexandre Vaniachine <vaniachine@anl.gov>"
9 __doc__ = "peek into APR files to read in-file metadata"
10 
11 
12 import os
13 from CLIDComps.clidGenerator import clidGenerator
14 cgen = clidGenerator("")
15 
17  """utility algorithm to inspect a file's content
18  """
19 
20  def __init__(self, f_root):
21  self.f = f_root
22  self.peeked_data = {}
23 
24  def run(self,writelog=False):
25  self.peeked_data = self._do_peeking(writelog)
26  return self._save_db_cache()
27 
28  def getPeekedData(self,key,writelog=False):
29  self.peeked_data = self._do_peeking(writelog)
30  if key in self.peeked_data.keys():
31  return self.peeked_data[key]
32  return
33 
34  def _do_peeking(self,writelog):
35  """ the real function doing all the work of peeking at the APR file
36  @return a dict of peeked-at data
37  """
38 
39  if writelog:
40  import uuid
41  stdout_fname = (
42  'peeker-tool-%i-%s.log.txt' %
43  (os.getpid(), uuid.uuid4())
44  )
45  stdout = open(stdout_fname, "w")
46  print ("Extracting in-file metadata without athena sub-process from file", self.f.GetName(), file=stdout)
47  print ("="*80, file=stdout)
48  stdout.flush()
49 
50  pool = self.f.Get("##Params")
51  import re
52  pool_token = re.compile(r'[\[]NAME=(?P<name>.*?)[]]' r'[\[]VALUE=(?P<value>.*?)[]]').match
53  params = []
54  for i in range(pool.GetEntries()):
55  if pool.GetEntry(i)>0:
56  match = pool_token(pool.FindLeaf("db_string").GetValueString())
57  if not match:
58  continue
59  d = match.groupdict()
60  params.append((d['name'], d['value']))
61  if d['name'].lower() == 'fid':
62  guid = d['value']
63 
64  meta = self.f.Get( 'MetaData' )
65  if not meta:
66  if writelog:
67  print ('No metadata', file=stdout)
68  return {}
69 
70  from AthenaPython.FilePeekerLib import toiter
71 
72  meta.GetEntry( 0 )
73 
74  esiName= 'Stream'
75  esiTypeName = 'EventStreamInfo'
76  for l in meta.GetListOfLeaves():
77  if l.GetTypeName().startswith(esiTypeName):
78  esiTypeName = l.GetTypeName()
79  esiName = l.GetName()
80  break
81 
82  if esiTypeName != 'EventStreamInfo_p3':
83  if writelog:
84  print ("old schema is not supported:", esiTypeName, file=stdout)
85  return {}
86 
87  import cppyy
88 
89  esic = cppyy.gbl.EventStreamInfoPTCnv_p3()
90  esi = getattr (meta, esiName)
91  if esiName.startswith(esiTypeName):
92  esiName = esiName[len(esiTypeName)+1:]
93 
94  peeked_data = {}
95 
96  if guid:
97  peeked_data['file_guid'] = guid
98 
99  peeked_data['run_type'] = ['N/A']
100 
101  if writelog:
102  print (peeked_data, file=stdout)
103 
104  peeked_data['nentries'] = esic.getNumberOfEvents(esi)
105  peeked_data['lumi_block'] = list(esic.lumiBlockNumbers(esi))
106  peeked_data['run_number'] = list(esic.runNumbers(esi))
107  peeked_data['stream_names'] = [s for s in esic.processingTags(esi)]
108 
109  item_list = list( (cgen.getNameFromClid(i.first), str(i.second)) for i in esic.itemList(esi))
110  #reorder items to match that of StoreGate
111  if ('DataHeader', esiName) in item_list:
112  item_list.remove(('DataHeader', esiName))
113  item_list.append(('DataHeader', esiName))
114 
115  peeked_data['eventdata_items'] = item_list
116 
117  etc = cppyy.gbl.EventTypePTCnv_p3()
118  def bit_mask(self):
119  raw_bit_mask=etc.bit_mask(self)
120  bit_mask_typecodes = [ ('IS_DATA','IS_SIMULATION'), ('IS_ATLAS', 'IS_TESTBEAM'), ('IS_PHYSICS','IS_CALIBRATION'),]
121  def decode_bitmask(idx):
122  if len(raw_bit_mask) <= idx:
123  return bit_mask_typecodes[idx][0]
124  isa_idx = raw_bit_mask[idx]
125  return bit_mask_typecodes[idx][isa_idx]
126  bm = map(decode_bitmask,
127  range(len(bit_mask_typecodes)))
128  return tuple(bm)
129 
130  def _get_detdescr_tags(evt_type):
131  ddt = evt_type.get_detdescr_tags().split()
132  # det_descr_tags is of the form:
133  # [ 'key1', 'val1', 'key2', 'val2', ... ]
134  ddt = dict(zip(ddt[0::2], # select 'key?'
135  ddt[1::2])) # select 'val?'
136  return ddt
137 
138  etp=esic.eventTypes(esi)
139  if etp.size() > 0:
140  et=etp[0]
141 
142  peeked_data['evt_type'] = bit_mask(et)
143 
144  peeked_data['mc_channel_number'] = [et.m_mc_channel_number]
145  peeked_data['evt_number'] = [et.m_mc_event_number]
146  if writelog:
147  print ('mc_event_weights.size:', et.m_mc_event_weights.size(), file=stdout)
148  print ('mc_event_weights value', et.m_mc_event_weights[0], file=stdout)
149  print ('user_type', et.m_user_type, file=stdout)
150 
151  # handle event-less files
152  if peeked_data['nentries'] == 0:
153  peeked_data['stream_names'] = [esiName]
154 
155  # more event-less files handling - APR-from-BS-event-less files
156  # see bug#98568
157  if len (peeked_data['run_number']) == 0 and meta.FindBranch('ByteStreamMetadata'):
158  bsmd = cppyy.gbl.ByteStreamMetadataPTCnv_p1()
159  peeked_data['lumi_block'] = [bsmd.getLumiBlock(meta.ByteStreamMetadata[0])]
160  peeked_data['run_number'] = [bsmd.getRunNumber(meta.ByteStreamMetadata[0])]
161  peeked_data['beam_type'] = [bsmd.getBeamType(meta.ByteStreamMetadata[0])]
162  peeked_data['beam_energy'] = [bsmd.getBeamEnergy(meta.ByteStreamMetadata[0])]
163  bs_metadata = {}
164  for md in bsmd.getFreeMetaDataStrings(meta.ByteStreamMetadata[0]):
165  if md.startswith('Event type:'):
166  k = 'evt_type'
167  v = []
168  if 'is sim' in md: v.append('IS_SIMULATION')
169  else: v.append('IS_DATA')
170  if 'is atlas' in md: v.append('IS_ATLAS')
171  else: v.append('IS_TESTBEAM')
172  if 'is physics' in md: v.append('IS_PHYSICS')
173  else: v.append('IS_CALIBRATION')
174  bs_metadata[k] = tuple(v)
175  elif md.startswith('GeoAtlas:'):
176  k = 'geometry'
177  v = md.split('GeoAtlas:')[1].strip()
178  bs_metadata[k] = v
179  elif md.startswith('IOVDbGlobalTag:'):
180  k = 'conditions_tag'
181  v = md.split('IOVDbGlobalTag:')[1].strip()
182  bs_metadata[k] = v
183  elif '=' in md:
184  k,v = md.split('=')
185  bs_metadata[k] = v
186  pass
187 
188  # ATMETADATA-6: without FreeMetaDataStrings this must be BS file from SFO
189  bs_metadata['Stream'] = bsmd.getStream(meta.ByteStreamMetadata[0])
190  bs_metadata['Project'] = bsmd.getProject(meta.ByteStreamMetadata[0])
191  if len(bs_metadata.get('evt_type','')) == 0 :
192  evt_type = ['IS_DATA', 'IS_ATLAS']
193  if bs_metadata.get('Stream', '').startswith('physics_'):
194  evt_type.append('IS_PHYSICS')
195  elif bs_metadata.get('Stream', '').startswith('calibration_'):
196  evt_type.append('IS_CALIBRATION')
197  elif bs_metadata.get('Project', '').endswith('_calib'):
198  evt_type.append('IS_CALIBRATION')
199  else:
200  evt_type.append('Unknown')
201  bs_metadata['evt_type'] = evt_type
202 
203  peeked_data['evt_type'] = bs_metadata.get('evt_type', [])
204  peeked_data['geometry'] = bs_metadata.get('geometry', None)
205  peeked_data['conditions_tag'] = bs_metadata.get('conditions_tag', None)
206  peeked_data['bs_metadata'] = bs_metadata
207  pass
208 
209  cnv = cppyy.gbl.IOVMetaDataContainerPTCnv_p1()
210 
211  obj = cppyy.gbl.IOVMetaDataContainer()
212 
213  def process_metadata(obj, metadata_name):
214  if writelog:
215  print ('processing container [%s]' % obj.folderName(), file=stdout)
216  data = []
217  payloads = obj.payloadContainer()
218  payloads_sz = payloads.size()
219  if hasattr(payloads, 'at'):
220  # HACK for bug #77976
221  _tmp = payloads
222  payloads = []
223  for ii in range(payloads_sz):
224  payloads.append(_tmp.at(ii))
225  pass
226  for ii,payload in zip(range(payloads_sz), payloads):
227  if not payload:
228  if writelog:
229  print ("**error** null-pointer ?", file=stdout)
230  continue
231  # names
232  chan_names = []
233  sz = payload.name_size()
234  if writelog:
235  print ('==names== (sz: %s)' % sz, file=stdout)
236  for idx in range(sz):
237  chan = payload.chanNum(idx)
238  chan_name = payload.chanName(chan)
239  chan_names.append(chan_name)
240 
241  if 1: # we don't really care about those...
242  # iovs
243  sz = payload.iov_size()
244  if writelog:
245  print ('==iovs== (sz: %s)' % sz, file=stdout)
246  for idx in range(sz):
247  chan = payload.chanNum(idx)
248  iov_range = payload.iovRange(chan)
249  iov_start = iov_range.start()
250  iov_stop = iov_range.stop()
251  if writelog:
252  print ('(%s, %s) => (%s, %s) valid=%s runEvt=%s' % (
253  iov_start.run(),
254  iov_start.event(),
255  iov_stop.run(),
256  iov_stop.event(),
257  iov_start.isValid(),
258  iov_start.isRunEvent()),
259  file=stdout)
260 
261  # attrs
262  attrs = [] # can't use a dict as spec.name() isn't unique
263  sz = payload.size()
264  if writelog:
265  print ('==attrs== (sz: %s)' % sz, file=stdout)
266  for idx in range(sz):
267  chan = payload.chanNum(idx)
268  attr_list = payload.attributeList(chan)
269  attr_data = []
270  for a in list(toiter(attr_list.begin(), attr_list.end())):
271  spec = a.specification()
272  a_type = spec.typeName()
273  if a_type.find('string') >= 0:
274  a_data = a.data['string']()
275  try:
276  a_data = eval(a_data,{},{})
277  except Exception:
278  # swallow and keep as a string
279  pass
280  else:
281  a_data = a.data[a_type]()
282  attr_data.append( (spec.name(), a_data) )
283  attrs.append(dict(attr_data))
284  if len(attrs) == len(chan_names):
285  data.append(dict(zip(chan_names,attrs)))
286  else:
287  if len(attrs):
288  if len(attrs) == 1:
289  data.append(attrs[0])
290  else:
291  data.append(attrs)
292  else:
293  data.append(chan_names)
294  pass # loop over payloads...
295  return data
296 
297  metadata = {}
298  def maybe_get(o, idx, default=None):
299  try:
300  return o[idx]
301  except IndexError:
302  return default
303  def maybe_float(o):
304  try:
305  return float(o)
306  except ValueError:
307  return o
308 
309  # FIXME - read metadata_items from MetaDataHdr
310  metadata_items = []
311 
312  for l in meta.GetListOfLeaves():
313  clName = l.GetTypeName().split('_p')[0]
314  flName = l.GetName()
315  if l.GetTypeName() == 'IOVMetaDataContainer_p1':
316  k = l.GetName()
317  pers = getattr (meta, k)
318  cnv.persToTrans(pers,obj)
319  if 0:
320  try:
321  obj.payloadContainer().at(0).dump()
322  except Exception:
323  if writelog:
324  print (l.GetName(), file=stdout)
325  pass
326  v = process_metadata(obj, k)
327  flName = obj.folderName()
328  metadata[obj.folderName()] = maybe_get(v, -1)
329  if flName[:11] in ['TriggerMenu','CutBookkeep','IncompleteC'] and clName[:6] != 'xAOD::': continue
330  metadata_items.append((clName,flName))
331  if clName == 'EventStreamInfo':
332  metadata_items.append(('DataHeader', ';00;MetaDataSvc'))
333 
334  peeked_data['metadata'] = metadata
335  peeked_data['metadata_items'] = metadata_items
336 
337  # try to make unique Stream Tags set (but not for eventless files)
338  if '/TRIGGER/HLT/Menu' in metadata and peeked_data['nentries'] > 0:
339  ti = set()
340  for d in metadata.get('/TRIGGER/HLT/Menu'):
341  ti.add(d['StreamInfo'])
342 
343  # build stream_tags from /TRIGGER/HLT/Menu metadata
344  stream_tags = []
345  for item in ti:
346  i = item.split(',')
347  if len(i)!=3:
348  if writelog:
349  print('**error** Invalid StreamInfo entry:',item, file=stdout)
350  else:
351  stream_tags.append( { 'obeys_lbk':bool(i[2]), 'stream_name':i[0], 'stream_type':i[1] } )
352 
353  peeked_data['stream_tags'] = stream_tags
354 
355  from PyUtils.Helpers import release_metadata
356  rel_metadata = release_metadata()
357  project = rel_metadata['project name']
358  version = rel_metadata['release']
359  release = project + '-' + version
360 
361 
362  taginfo = {}
363  if '/TagInfo' in metadata:
364  taginfo = metadata['/TagInfo'].copy()
365 
366  # but not for eventless files
367  if peeked_data['nentries'] > 0:
368  taginfo['AtlasRelease'] = release
369 
370  peeked_data['tag_info'] = taginfo
371  if taginfo:
372  peeked_data['det_descr_tags'] = taginfo
373  peeked_data['geometry'] = taginfo.get('GeoAtlas', None)
374  peeked_data['conditions_tag'] = taginfo.get('IOVDbGlobalTag', None)
375  peeked_data['beam_type'] = [taginfo.get('beam_type','N/A')]
376  peeked_data['beam_energy']= [maybe_float(taginfo.get('beam_energy',
377  'N/A'))]
378 
379  if 'evt_type' not in peeked_data: # must be eventless MC file
380  if '/Simulation/Parameters' in metadata:
381  peeked_data['evt_type'] = ['IS_SIMULATION', 'IS_ATLAS', 'IS_PHYSICS']
382  peeked_data['run_number'] = [metadata['/Simulation/Parameters'].get('RunNumber','')]
383  else:
384  peeked_data['evt_type'] = []
385 
386  if 'geometry' not in peeked_data:
387  peeked_data['geometry'] = None
388  if 'conditions_tag' not in peeked_data:
389  peeked_data['conditions_tag'] = None
390  if 'det_descr_tags' not in peeked_data:
391  peeked_data['det_descr_tags'] = {}
392 
393  if writelog:
394 
395  print (':::::: summary ::::::', file=stdout)
396  print (' - nbr events: %s' % peeked_data['nentries'], file=stdout)
397  print (' - run numbers: %s' % peeked_data['run_number'], file=stdout)
398  print (' - lumiblocks: %s' % peeked_data['lumi_block'], file=stdout)
399  print (' - evt types: ', peeked_data['evt_type'], file=stdout)
400  print (' - item list: %s' % len(peeked_data['eventdata_items']), file=stdout)
401  print (' - processing tags: %s' % peeked_data['stream_names'], file=stdout)
402  print (' - geometry: %s' % peeked_data['geometry'], file=stdout)
403  print (' - conditions tag: %s' % peeked_data['conditions_tag'], file=stdout)
404  print (' - tag-info: %s' % peeked_data['tag_info'].keys(), file=stdout)
405  stdout.flush()
406  stdout.close()
407 
408  return peeked_data
409 
410  def _save_db_cache(self):
411  """save file informations using sqlite"""
412  import tempfile
413  fd_pkl,out_pkl_fname = tempfile.mkstemp(suffix='.pkl')
414  os.close(fd_pkl)
415  if os.path.exists(out_pkl_fname):
416  os.remove(out_pkl_fname)
417  oname = os.path.expanduser(os.path.expandvars(out_pkl_fname))
418  if os.path.exists(oname):
419  os.remove(oname)
420 
421  import PyUtils.dbsqlite as dbsqlite
422  db = dbsqlite.open(oname,flags='w')
423  db['fileinfos'] = self.peeked_data
424  db.close()
425 
426  if os.path.exists(oname) and len(self.peeked_data) > 0:
427  return 0, out_pkl_fname
428  else:
429  os.remove(oname)
430  return 1, out_pkl_fname
python.Helpers.release_metadata
def release_metadata()
Definition: Tools/PyUtils/python/Helpers.py:129
python.FilePeekerTool.FilePeekerTool.__init__
def __init__(self, f_root)
Definition: FilePeekerTool.py:20
python.FilePeekerTool.FilePeekerTool.peeked_data
peeked_data
Definition: FilePeekerTool.py:22
python.FilePeekerLib.toiter
def toiter(beg, end)
Definition: FilePeekerLib.py:23
run_Egamma1_LArStrip_Fex.dump
dump
Definition: run_Egamma1_LArStrip_Fex.py:87
Get
T * Get(TFile &f, const std::string &n, const std::string &dir="", const chainmap_t *chainmap=0, std::vector< std::string > *saved=0)
get a histogram given a path, and an optional initial directory if histogram is not found,...
Definition: comparitor.cxx:181
python.FilePeekerTool.FilePeekerTool.getPeekedData
def getPeekedData(self, key, writelog=False)
Definition: FilePeekerTool.py:28
python.FilePeekerTool.FilePeekerTool._do_peeking
def _do_peeking(self, writelog)
Definition: FilePeekerTool.py:34
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:194
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
python.FilePeekerTool.FilePeekerTool._save_db_cache
def _save_db_cache(self)
Definition: FilePeekerTool.py:410
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:25
Trk::open
@ open
Definition: BinningType.h:40
python.FilePeekerTool.FilePeekerTool.run
def run(self, writelog=False)
Definition: FilePeekerTool.py:24
python.Bindings.bit_mask
bit_mask
Definition: Control/AthenaPython/python/Bindings.py:659
get
T * get(TKey *tobj)
get a TObject* from a TKey* (why can't a TObject be a TKey?)
Definition: hcg.cxx:127
str
Definition: BTagTrackIpAccessor.cxx:11
python.Bindings.keys
keys
Definition: Control/AthenaPython/python/Bindings.py:801
python.FilePeekerTool.FilePeekerTool.f
f
Definition: FilePeekerTool.py:21
calibdata.copy
bool copy
Definition: calibdata.py:26
xAOD::bool
setBGCode setTAP setLVL2ErrorBits bool
Definition: TrigDecision_v1.cxx:60
python.FilePeekerTool.FilePeekerTool
Definition: FilePeekerTool.py:16
Trk::split
@ split
Definition: LayerMaterialProperties.h:38
python.LArMinBiasAlgConfig.float
float
Definition: LArMinBiasAlgConfig.py:65