ATLAS Offline Software
FilePeekerTool.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2 
3 # @file PyUtils.FilePeekerTool
4 # @purpose peek into APR files to read in-file metadata without Athena (based on PyAthena.FilePeekerLib code by Sebastian Binet)
5 # @author Alexandre Vaniachine <vaniachine@anl.gov>
6 # @date May 2015
7 
8 from __future__ import print_function
9 
10 __author__ = "Alexandre Vaniachine <vaniachine@anl.gov>"
11 __doc__ = "peek into APR files to read in-file metadata"
12 
13 
14 import os
15 from CLIDComps.clidGenerator import clidGenerator
16 cgen = clidGenerator("")
17 
19  """utility algorithm to inspect a file's content
20  """
21 
22  def __init__(self, f_root):
23  self.f = f_root
24  self.peeked_data = {}
25 
26  def run(self,writelog=False):
27  self.peeked_data = self._do_peeking(writelog)
28  return self._save_db_cache()
29 
30  def getPeekedData(self,key,writelog=False):
31  self.peeked_data = self._do_peeking(writelog)
32  if key in self.peeked_data.keys():
33  return self.peeked_data[key]
34  return
35 
36  def _do_peeking(self,writelog):
37  """ the real function doing all the work of peeking at the APR file
38  @return a dict of peeked-at data
39  """
40 
41  if writelog:
42  import uuid
43  stdout_fname = (
44  'peeker-tool-%i-%s.log.txt' %
45  (os.getpid(), uuid.uuid4())
46  )
47  stdout = open(stdout_fname, "w")
48  print ("Extracting in-file metadata without athena sub-process from file", self.f.GetName(), file=stdout)
49  print ("="*80, file=stdout)
50  stdout.flush()
51 
52  pool = self.f.Get("##Params")
53  import re
54  pool_token = re.compile(r'[\[]NAME=(?P<name>.*?)[]]' r'[\[]VALUE=(?P<value>.*?)[]]').match
55  params = []
56  for i in range(pool.GetEntries()):
57  if pool.GetEntry(i)>0:
58  match = pool_token(pool.FindLeaf("db_string").GetValueString())
59  if not match:
60  continue
61  d = match.groupdict()
62  params.append((d['name'], d['value']))
63  if d['name'].lower() == 'fid':
64  guid = d['value']
65 
66  meta = self.f.Get( 'MetaData' )
67  if not meta:
68  if writelog:
69  print ('No metadata', file=stdout)
70  return {}
71 
72  from AthenaPython.FilePeekerLib import toiter
73 
74  meta.GetEntry( 0 )
75 
76  esiName= 'Stream'
77  esiTypeName = 'EventStreamInfo'
78  for l in meta.GetListOfLeaves():
79  if l.GetTypeName().startswith(esiTypeName):
80  esiTypeName = l.GetTypeName()
81  esiName = l.GetName()
82  break
83 
84  if esiTypeName != 'EventStreamInfo_p3':
85  if writelog:
86  print ("old schema is not supported:", esiTypeName, file=stdout)
87  return {}
88 
89  import cppyy
90 
91  esic = cppyy.gbl.EventStreamInfoPTCnv_p3()
92  esi = getattr (meta, esiName)
93  if esiName.startswith(esiTypeName):
94  esiName = esiName[len(esiTypeName)+1:]
95 
96  peeked_data = {}
97 
98  if guid:
99  peeked_data['file_guid'] = guid
100 
101  peeked_data['run_type'] = ['N/A']
102 
103  if writelog:
104  print (peeked_data, file=stdout)
105 
106  peeked_data['nentries'] = esic.getNumberOfEvents(esi)
107  peeked_data['lumi_block'] = list(esic.lumiBlockNumbers(esi))
108  peeked_data['run_number'] = list(esic.runNumbers(esi))
109  peeked_data['stream_names'] = [s for s in esic.processingTags(esi)]
110 
111  item_list = list( (cgen.getNameFromClid(i.first), i.second) for i in esic.itemList(esi))
112  #reorder items to match that of StoreGate
113  if ('DataHeader', esiName) in item_list:
114  item_list.remove(('DataHeader', esiName))
115  item_list.append(('DataHeader', esiName))
116 
117  peeked_data['eventdata_items'] = item_list
118 
119  etc = cppyy.gbl.EventTypePTCnv_p3()
120  def bit_mask(self):
121  raw_bit_mask=etc.bit_mask(self)
122  bit_mask_typecodes = [ ('IS_DATA','IS_SIMULATION'), ('IS_ATLAS', 'IS_TESTBEAM'), ('IS_PHYSICS','IS_CALIBRATION'),]
123  def decode_bitmask(idx):
124  if len(raw_bit_mask) <= idx:
125  return bit_mask_typecodes[idx][0]
126  isa_idx = raw_bit_mask[idx]
127  return bit_mask_typecodes[idx][isa_idx]
128  bm = map(decode_bitmask,
129  range(len(bit_mask_typecodes)))
130  return tuple(bm)
131 
132  def _get_detdescr_tags(evt_type):
133  ddt = evt_type.get_detdescr_tags().split()
134  # det_descr_tags is of the form:
135  # [ 'key1', 'val1', 'key2', 'val2', ... ]
136  ddt = dict(zip(ddt[0::2], # select 'key?'
137  ddt[1::2])) # select 'val?'
138  return ddt
139 
140  etp=esic.eventTypes(esi)
141  if etp.size() > 0:
142  et=etp[0]
143 
144  peeked_data['evt_type'] = bit_mask(et)
145 
146  peeked_data['mc_channel_number'] = [et.m_mc_channel_number]
147  peeked_data['evt_number'] = [et.m_mc_event_number]
148  if writelog:
149  print ('mc_event_weights.size:', et.m_mc_event_weights.size(), file=stdout)
150  print ('mc_event_weights value', et.m_mc_event_weights[0], file=stdout)
151  print ('user_type', et.m_user_type, file=stdout)
152 
153  # handle event-less files
154  if peeked_data['nentries'] == 0:
155  peeked_data['stream_names'] = [esiName]
156 
157  # more event-less files handling - APR-from-BS-event-less files
158  # see bug#98568
159  if len (peeked_data['run_number']) == 0 and meta.FindBranch('ByteStreamMetadata'):
160  bsmd = cppyy.gbl.ByteStreamMetadataPTCnv_p1()
161  peeked_data['lumi_block'] = [bsmd.getLumiBlock(meta.ByteStreamMetadata[0])]
162  peeked_data['run_number'] = [bsmd.getRunNumber(meta.ByteStreamMetadata[0])]
163  peeked_data['beam_type'] = [bsmd.getBeamType(meta.ByteStreamMetadata[0])]
164  peeked_data['beam_energy'] = [bsmd.getBeamEnergy(meta.ByteStreamMetadata[0])]
165  bs_metadata = {}
166  for md in bsmd.getFreeMetaDataStrings(meta.ByteStreamMetadata[0]):
167  if md.startswith('Event type:'):
168  k = 'evt_type'
169  v = []
170  if 'is sim' in md: v.append('IS_SIMULATION')
171  else: v.append('IS_DATA')
172  if 'is atlas' in md: v.append('IS_ATLAS')
173  else: v.append('IS_TESTBEAM')
174  if 'is physics' in md: v.append('IS_PHYSICS')
175  else: v.append('IS_CALIBRATION')
176  bs_metadata[k] = tuple(v)
177  elif md.startswith('GeoAtlas:'):
178  k = 'geometry'
179  v = md.split('GeoAtlas:')[1].strip()
180  bs_metadata[k] = v
181  elif md.startswith('IOVDbGlobalTag:'):
182  k = 'conditions_tag'
183  v = md.split('IOVDbGlobalTag:')[1].strip()
184  bs_metadata[k] = v
185  elif '=' in md:
186  k,v = md.split('=')
187  bs_metadata[k] = v
188  pass
189 
190  # ATMETADATA-6: without FreeMetaDataStrings this must be BS file from SFO
191  bs_metadata['Stream'] = bsmd.getStream(meta.ByteStreamMetadata[0])
192  bs_metadata['Project'] = bsmd.getProject(meta.ByteStreamMetadata[0])
193  if len(bs_metadata.get('evt_type','')) == 0 :
194  evt_type = ['IS_DATA', 'IS_ATLAS']
195  if bs_metadata.get('Stream', '').startswith('physics_'):
196  evt_type.append('IS_PHYSICS')
197  elif bs_metadata.get('Stream', '').startswith('calibration_'):
198  evt_type.append('IS_CALIBRATION')
199  elif bs_metadata.get('Project', '').endswith('_calib'):
200  evt_type.append('IS_CALIBRATION')
201  else:
202  evt_type.append('Unknown')
203  bs_metadata['evt_type'] = evt_type
204 
205  peeked_data['evt_type'] = bs_metadata.get('evt_type', [])
206  peeked_data['geometry'] = bs_metadata.get('geometry', None)
207  peeked_data['conditions_tag'] = bs_metadata.get('conditions_tag', None)
208  peeked_data['bs_metadata'] = bs_metadata
209  pass
210 
211  cnv = cppyy.gbl.IOVMetaDataContainerPTCnv_p1()
212 
213  obj = cppyy.gbl.IOVMetaDataContainer()
214 
215  def process_metadata(obj, metadata_name):
216  if writelog:
217  print ('processing container [%s]' % obj.folderName(), file=stdout)
218  data = []
219  payloads = obj.payloadContainer()
220  payloads_sz = payloads.size()
221  if hasattr(payloads, 'at'):
222  # HACK for bug #77976
223  _tmp = payloads
224  payloads = []
225  for ii in range(payloads_sz):
226  payloads.append(_tmp.at(ii))
227  pass
228  for ii,payload in zip(range(payloads_sz), payloads):
229  if not payload:
230  if writelog:
231  print ("**error** null-pointer ?", file=stdout)
232  continue
233  # names
234  chan_names = []
235  sz = payload.name_size()
236  if writelog:
237  print ('==names== (sz: %s)' % sz, file=stdout)
238  for idx in range(sz):
239  chan = payload.chanNum(idx)
240  chan_name = payload.chanName(chan)
241  chan_names.append(chan_name)
242 
243  if 1: # we don't really care about those...
244  # iovs
245  sz = payload.iov_size()
246  if writelog:
247  print ('==iovs== (sz: %s)' % sz, file=stdout)
248  for idx in range(sz):
249  chan = payload.chanNum(idx)
250  iov_range = payload.iovRange(chan)
251  iov_start = iov_range.start()
252  iov_stop = iov_range.stop()
253  if writelog:
254  print ('(%s, %s) => (%s, %s) valid=%s runEvt=%s' % (
255  iov_start.run(),
256  iov_start.event(),
257  iov_stop.run(),
258  iov_stop.event(),
259  iov_start.isValid(),
260  iov_start.isRunEvent()),
261  file=stdout)
262 
263  # attrs
264  attrs = [] # can't use a dict as spec.name() isn't unique
265  sz = payload.size()
266  if writelog:
267  print ('==attrs== (sz: %s)' % sz, file=stdout)
268  for idx in range(sz):
269  chan = payload.chanNum(idx)
270  attr_list = payload.attributeList(chan)
271  attr_data = []
272  for a in list(toiter(attr_list.begin(), attr_list.end())):
273  spec = a.specification()
274  a_type = spec.typeName()
275  if a_type.find('string') >= 0:
276  a_data = a.data['string']()
277  try:
278  a_data = eval(a_data,{},{})
279  except Exception:
280  # swallow and keep as a string
281  pass
282  else:
283  a_data = a.data[a_type]()
284  attr_data.append( (spec.name(), a_data) )
285  attrs.append(dict(attr_data))
286  if len(attrs) == len(chan_names):
287  data.append(dict(zip(chan_names,attrs)))
288  else:
289  if len(attrs):
290  if len(attrs) == 1:
291  data.append(attrs[0])
292  else:
293  data.append(attrs)
294  else:
295  data.append(chan_names)
296  pass # loop over payloads...
297  return data
298 
299  metadata = {}
300  def maybe_get(o, idx, default=None):
301  try:
302  return o[idx]
303  except IndexError:
304  return default
305  def maybe_float(o):
306  try:
307  return float(o)
308  except ValueError:
309  return o
310 
311  # FIXME - read metadata_items from MetaDataHdr
312  metadata_items = []
313 
314  for l in meta.GetListOfLeaves():
315  clName = l.GetTypeName().split('_p')[0]
316  flName = l.GetName()
317  if l.GetTypeName() == 'IOVMetaDataContainer_p1':
318  k = l.GetName()
319  pers = getattr (meta, k)
320  cnv.persToTrans(pers,obj)
321  if 0:
322  try:
323  obj.payloadContainer().at(0).dump()
324  except Exception:
325  if writelog:
326  print (l.GetName(), file=stdout)
327  pass
328  v = process_metadata(obj, k)
329  flName = obj.folderName()
330  metadata[obj.folderName()] = maybe_get(v, -1)
331  if flName[:11] in ['TriggerMenu','CutBookkeep','IncompleteC'] and clName[:6] != 'xAOD::': continue
332  metadata_items.append((clName,flName))
333  if clName == 'EventStreamInfo':
334  metadata_items.append(('DataHeader', ';00;MetaDataSvc'))
335 
336  peeked_data['metadata'] = metadata
337  peeked_data['metadata_items'] = metadata_items
338 
339  # try to make unique Stream Tags set (but not for eventless files)
340  if '/TRIGGER/HLT/Menu' in metadata and peeked_data['nentries'] > 0:
341  ti = set()
342  for d in metadata.get('/TRIGGER/HLT/Menu'):
343  ti.add(d['StreamInfo'])
344 
345  # build stream_tags from /TRIGGER/HLT/Menu metadata
346  stream_tags = []
347  for item in ti:
348  i = item.split(',')
349  if len(i)!=3:
350  if writelog:
351  print('**error** Invalid StreamInfo entry:',item, file=stdout)
352  else:
353  stream_tags.append( { 'obeys_lbk':bool(i[2]), 'stream_name':i[0], 'stream_type':i[1] } )
354 
355  peeked_data['stream_tags'] = stream_tags
356 
357  from PyUtils.Helpers import release_metadata
358  rel_metadata = release_metadata()
359  project = rel_metadata['project name']
360  version = rel_metadata['release']
361  release = project + '-' + version
362 
363 
364  taginfo = {}
365  if '/TagInfo' in metadata:
366  taginfo = metadata['/TagInfo'].copy()
367 
368  # but not for eventless files
369  if peeked_data['nentries'] > 0:
370  taginfo['AtlasRelease'] = release
371 
372  peeked_data['tag_info'] = taginfo
373  if taginfo:
374  peeked_data['det_descr_tags'] = taginfo
375  peeked_data['geometry'] = taginfo.get('GeoAtlas', None)
376  peeked_data['conditions_tag'] = taginfo.get('IOVDbGlobalTag', None)
377  peeked_data['beam_type'] = [taginfo.get('beam_type','N/A')]
378  peeked_data['beam_energy']= [maybe_float(taginfo.get('beam_energy',
379  'N/A'))]
380 
381  if 'evt_type' not in peeked_data: # must be eventless MC file
382  if '/Simulation/Parameters' in metadata:
383  peeked_data['evt_type'] = ['IS_SIMULATION', 'IS_ATLAS', 'IS_PHYSICS']
384  peeked_data['run_number'] = [metadata['/Simulation/Parameters'].get('RunNumber','')]
385  else:
386  peeked_data['evt_type'] = []
387 
388  if 'geometry' not in peeked_data:
389  peeked_data['geometry'] = None
390  if 'conditions_tag' not in peeked_data:
391  peeked_data['conditions_tag'] = None
392  if 'det_descr_tags' not in peeked_data:
393  peeked_data['det_descr_tags'] = {}
394 
395  if writelog:
396 
397  print (':::::: summary ::::::', file=stdout)
398  print (' - nbr events: %s' % peeked_data['nentries'], file=stdout)
399  print (' - run numbers: %s' % peeked_data['run_number'], file=stdout)
400  print (' - lumiblocks: %s' % peeked_data['lumi_block'], file=stdout)
401  print (' - evt types: ', peeked_data['evt_type'], file=stdout)
402  print (' - item list: %s' % len(peeked_data['eventdata_items']), file=stdout)
403  print (' - processing tags: %s' % peeked_data['stream_names'], file=stdout)
404  print (' - geometry: %s' % peeked_data['geometry'], file=stdout)
405  print (' - conditions tag: %s' % peeked_data['conditions_tag'], file=stdout)
406  print (' - tag-info: %s' % peeked_data['tag_info'].keys(), file=stdout)
407  stdout.flush()
408  stdout.close()
409 
410  return peeked_data
411 
412  def _save_db_cache(self):
413  """save file informations using sqlite"""
414  import tempfile
415  fd_pkl,out_pkl_fname = tempfile.mkstemp(suffix='.pkl')
416  os.close(fd_pkl)
417  if os.path.exists(out_pkl_fname):
418  os.remove(out_pkl_fname)
419  oname = os.path.expanduser(os.path.expandvars(out_pkl_fname))
420  if os.path.exists(oname):
421  os.remove(oname)
422 
423  import PyUtils.dbsqlite as dbsqlite
424  db = dbsqlite.open(oname,flags='w')
425  db['fileinfos'] = self.peeked_data
426  db.close()
427 
428  if os.path.exists(oname) and len(self.peeked_data) > 0:
429  return 0, out_pkl_fname
430  else:
431  os.remove(oname)
432  return 1, out_pkl_fname
python.Helpers.release_metadata
def release_metadata()
Definition: Tools/PyUtils/python/Helpers.py:143
python.FilePeekerTool.FilePeekerTool.__init__
def __init__(self, f_root)
Definition: FilePeekerTool.py:22
python.FilePeekerTool.FilePeekerTool.peeked_data
peeked_data
Definition: FilePeekerTool.py:24
python.FilePeekerLib.toiter
def toiter(beg, end)
Definition: FilePeekerLib.py:25
run_Egamma1_LArStrip_Fex.dump
dump
Definition: run_Egamma1_LArStrip_Fex.py:88
Get
T * Get(TFile &f, const std::string &n, const std::string &dir="", const chainmap_t *chainmap=0, std::vector< std::string > *saved=0)
get a histogram given a path, and an optional initial directory if histogram is not found,...
Definition: comparitor.cxx:178
python.FilePeekerTool.FilePeekerTool.getPeekedData
def getPeekedData(self, key, writelog=False)
Definition: FilePeekerTool.py:30
python.FilePeekerTool.FilePeekerTool._do_peeking
def _do_peeking(self, writelog)
Definition: FilePeekerTool.py:36
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
python.FilePeekerTool.FilePeekerTool._save_db_cache
def _save_db_cache(self)
Definition: FilePeekerTool.py:412
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
Trk::open
@ open
Definition: BinningType.h:40
python.FilePeekerTool.FilePeekerTool.run
def run(self, writelog=False)
Definition: FilePeekerTool.py:26
python.Bindings.bit_mask
bit_mask
Definition: Control/AthenaPython/python/Bindings.py:656
get
T * get(TKey *tobj)
get a TObject* from a TKey* (why can't a TObject be a TKey?)
Definition: hcg.cxx:127
python.Bindings.keys
keys
Definition: Control/AthenaPython/python/Bindings.py:798
dbg::print
void print(std::FILE *stream, std::format_string< Args... > fmt, Args &&... args)
Definition: SGImplSvc.cxx:70
python.FilePeekerTool.FilePeekerTool.f
f
Definition: FilePeekerTool.py:23
calibdata.copy
bool copy
Definition: calibdata.py:27
xAOD::bool
setBGCode setTAP setLVL2ErrorBits bool
Definition: TrigDecision_v1.cxx:60
python.FilePeekerTool.FilePeekerTool
Definition: FilePeekerTool.py:18
readCCLHist.float
float
Definition: readCCLHist.py:83
Trk::split
@ split
Definition: LayerMaterialProperties.h:38