8 __author__ =
"Alexandre Vaniachine <vaniachine@anl.gov>"
9 __doc__ =
"peek into APR files to read in-file metadata"
13 from CLIDComps.clidGenerator
import clidGenerator
14 cgen = clidGenerator(
"")
17 """utility algorithm to inspect a file's content
24 def run(self,writelog=False):
35 """ the real function doing all the work of peeking at the APR file
36 @return a dict of peeked-at data
42 'peeker-tool-%i-%s.log.txt' %
43 (os.getpid(), uuid.uuid4())
45 stdout =
open(stdout_fname,
"w")
46 print (
"Extracting in-file metadata without athena sub-process from file", self.
f.GetName(), file=stdout)
47 print (
"="*80, file=stdout)
50 pool = self.
f.
Get(
"##Params")
52 pool_token = re.compile(
r'[\[]NAME=(?P<name>.*?)[]]' r'[\[]VALUE=(?P<value>.*?)[]]').match
54 for i
in range(pool.GetEntries()):
55 if pool.GetEntry(i)>0:
56 match = pool_token(pool.FindLeaf(
"db_string").GetValueString())
60 params.append((d[
'name'], d[
'value']))
61 if d[
'name'].lower() ==
'fid':
64 meta = self.
f.
Get(
'MetaData' )
67 print (
'No metadata', file=stdout)
70 from AthenaPython.FilePeekerLib
import toiter
75 esiTypeName =
'EventStreamInfo'
76 for l
in meta.GetListOfLeaves():
77 if l.GetTypeName().startswith(esiTypeName):
78 esiTypeName = l.GetTypeName()
82 if esiTypeName !=
'EventStreamInfo_p3':
84 print (
"old schema is not supported:", esiTypeName, file=stdout)
89 esic = cppyy.gbl.EventStreamInfoPTCnv_p3()
90 esi = getattr (meta, esiName)
91 if esiName.startswith(esiTypeName):
92 esiName = esiName[len(esiTypeName)+1:]
97 peeked_data[
'file_guid'] = guid
99 peeked_data[
'run_type'] = [
'N/A']
102 print (peeked_data, file=stdout)
104 peeked_data[
'nentries'] = esic.getNumberOfEvents(esi)
105 peeked_data[
'lumi_block'] =
list(esic.lumiBlockNumbers(esi))
106 peeked_data[
'run_number'] =
list(esic.runNumbers(esi))
107 peeked_data[
'stream_names'] = [s
for s
in esic.processingTags(esi)]
109 item_list =
list( (cgen.getNameFromClid(i.first),
str(i.second))
for i
in esic.itemList(esi))
111 if (
'DataHeader', esiName)
in item_list:
112 item_list.remove((
'DataHeader', esiName))
113 item_list.append((
'DataHeader', esiName))
115 peeked_data[
'eventdata_items'] = item_list
117 etc = cppyy.gbl.EventTypePTCnv_p3()
119 raw_bit_mask=etc.bit_mask(self)
120 bit_mask_typecodes = [ (
'IS_DATA',
'IS_SIMULATION'), (
'IS_ATLAS',
'IS_TESTBEAM'), (
'IS_PHYSICS',
'IS_CALIBRATION'),]
121 def decode_bitmask(idx):
122 if len(raw_bit_mask) <= idx:
123 return bit_mask_typecodes[idx][0]
124 isa_idx = raw_bit_mask[idx]
125 return bit_mask_typecodes[idx][isa_idx]
126 bm = map(decode_bitmask,
127 range(len(bit_mask_typecodes)))
130 def _get_detdescr_tags(evt_type):
131 ddt = evt_type.get_detdescr_tags().
split()
134 ddt = dict(zip(ddt[0::2],
138 etp=esic.eventTypes(esi)
142 peeked_data[
'evt_type'] =
bit_mask(et)
144 peeked_data[
'mc_channel_number'] = [et.m_mc_channel_number]
145 peeked_data[
'evt_number'] = [et.m_mc_event_number]
147 print (
'mc_event_weights.size:', et.m_mc_event_weights.size(), file=stdout)
148 print (
'mc_event_weights value', et.m_mc_event_weights[0], file=stdout)
149 print (
'user_type', et.m_user_type, file=stdout)
152 if peeked_data[
'nentries'] == 0:
153 peeked_data[
'stream_names'] = [esiName]
157 if len (peeked_data[
'run_number']) == 0
and meta.FindBranch(
'ByteStreamMetadata'):
158 bsmd = cppyy.gbl.ByteStreamMetadataPTCnv_p1()
159 peeked_data[
'lumi_block'] = [bsmd.getLumiBlock(meta.ByteStreamMetadata[0])]
160 peeked_data[
'run_number'] = [bsmd.getRunNumber(meta.ByteStreamMetadata[0])]
161 peeked_data[
'beam_type'] = [bsmd.getBeamType(meta.ByteStreamMetadata[0])]
162 peeked_data[
'beam_energy'] = [bsmd.getBeamEnergy(meta.ByteStreamMetadata[0])]
164 for md
in bsmd.getFreeMetaDataStrings(meta.ByteStreamMetadata[0]):
165 if md.startswith(
'Event type:'):
168 if 'is sim' in md: v.append(
'IS_SIMULATION')
169 else: v.append(
'IS_DATA')
170 if 'is atlas' in md: v.append(
'IS_ATLAS')
171 else: v.append(
'IS_TESTBEAM')
172 if 'is physics' in md: v.append(
'IS_PHYSICS')
173 else: v.append(
'IS_CALIBRATION')
174 bs_metadata[k] = tuple(v)
175 elif md.startswith(
'GeoAtlas:'):
177 v = md.split(
'GeoAtlas:')[1].strip()
179 elif md.startswith(
'IOVDbGlobalTag:'):
181 v = md.split(
'IOVDbGlobalTag:')[1].strip()
189 bs_metadata[
'Stream'] = bsmd.getStream(meta.ByteStreamMetadata[0])
190 bs_metadata[
'Project'] = bsmd.getProject(meta.ByteStreamMetadata[0])
191 if len(bs_metadata.get(
'evt_type',
'')) == 0 :
192 evt_type = [
'IS_DATA',
'IS_ATLAS']
193 if bs_metadata.get(
'Stream',
'').startswith(
'physics_'):
194 evt_type.append(
'IS_PHYSICS')
195 elif bs_metadata.get(
'Stream',
'').startswith(
'calibration_'):
196 evt_type.append(
'IS_CALIBRATION')
197 elif bs_metadata.get(
'Project',
'').endswith(
'_calib'):
198 evt_type.append(
'IS_CALIBRATION')
200 evt_type.append(
'Unknown')
201 bs_metadata[
'evt_type'] = evt_type
203 peeked_data[
'evt_type'] = bs_metadata.get(
'evt_type', [])
204 peeked_data[
'geometry'] = bs_metadata.get(
'geometry',
None)
205 peeked_data[
'conditions_tag'] = bs_metadata.get(
'conditions_tag',
None)
206 peeked_data[
'bs_metadata'] = bs_metadata
209 cnv = cppyy.gbl.IOVMetaDataContainerPTCnv_p1()
211 obj = cppyy.gbl.IOVMetaDataContainer()
213 def process_metadata(obj, metadata_name):
215 print (
'processing container [%s]' % obj.folderName(), file=stdout)
217 payloads = obj.payloadContainer()
218 payloads_sz = payloads.size()
219 if hasattr(payloads,
'at'):
223 for ii
in range(payloads_sz):
224 payloads.append(_tmp.at(ii))
226 for ii,payload
in zip(
range(payloads_sz), payloads):
229 print (
"**error** null-pointer ?", file=stdout)
233 sz = payload.name_size()
235 print (
'==names== (sz: %s)' % sz, file=stdout)
236 for idx
in range(sz):
237 chan = payload.chanNum(idx)
238 chan_name = payload.chanName(chan)
239 chan_names.append(chan_name)
243 sz = payload.iov_size()
245 print (
'==iovs== (sz: %s)' % sz, file=stdout)
246 for idx
in range(sz):
247 chan = payload.chanNum(idx)
248 iov_range = payload.iovRange(chan)
249 iov_start = iov_range.start()
250 iov_stop = iov_range.stop()
252 print (
'(%s, %s) => (%s, %s) valid=%s runEvt=%s' % (
258 iov_start.isRunEvent()),
265 print (
'==attrs== (sz: %s)' % sz, file=stdout)
266 for idx
in range(sz):
267 chan = payload.chanNum(idx)
268 attr_list = payload.attributeList(chan)
270 for a
in list(
toiter(attr_list.begin(), attr_list.end())):
271 spec = a.specification()
272 a_type = spec.typeName()
273 if a_type.find(
'string') >= 0:
274 a_data = a.data[
'string']()
276 a_data = eval(a_data,{},{})
281 a_data = a.data[a_type]()
282 attr_data.append( (spec.name(), a_data) )
283 attrs.append(dict(attr_data))
284 if len(attrs) == len(chan_names):
285 data.append(dict(zip(chan_names,attrs)))
289 data.append(attrs[0])
293 data.append(chan_names)
298 def maybe_get(o, idx, default=None):
312 for l
in meta.GetListOfLeaves():
313 clName = l.GetTypeName().
split(
'_p')[0]
315 if l.GetTypeName() ==
'IOVMetaDataContainer_p1':
317 pers = getattr (meta, k)
318 cnv.persToTrans(pers,obj)
321 obj.payloadContainer().at(0).
dump()
324 print (l.GetName(), file=stdout)
326 v = process_metadata(obj, k)
327 flName = obj.folderName()
328 metadata[obj.folderName()] = maybe_get(v, -1)
329 if flName[:11]
in [
'TriggerMenu',
'CutBookkeep',
'IncompleteC']
and clName[:6] !=
'xAOD::':
continue
330 metadata_items.append((clName,flName))
331 if clName ==
'EventStreamInfo':
332 metadata_items.append((
'DataHeader',
';00;MetaDataSvc'))
334 peeked_data[
'metadata'] = metadata
335 peeked_data[
'metadata_items'] = metadata_items
338 if '/TRIGGER/HLT/Menu' in metadata
and peeked_data[
'nentries'] > 0:
340 for d
in metadata.get(
'/TRIGGER/HLT/Menu'):
341 ti.add(d[
'StreamInfo'])
349 print(
'**error** Invalid StreamInfo entry:',item, file=stdout)
351 stream_tags.append( {
'obeys_lbk':
bool(i[2]),
'stream_name':i[0],
'stream_type':i[1] } )
353 peeked_data[
'stream_tags'] = stream_tags
355 from PyUtils.Helpers
import release_metadata
357 project = rel_metadata[
'project name']
358 version = rel_metadata[
'release']
359 release = project +
'-' + version
363 if '/TagInfo' in metadata:
364 taginfo = metadata[
'/TagInfo'].
copy()
367 if peeked_data[
'nentries'] > 0:
368 taginfo[
'AtlasRelease'] = release
370 peeked_data[
'tag_info'] = taginfo
372 peeked_data[
'det_descr_tags'] = taginfo
373 peeked_data[
'geometry'] = taginfo.get(
'GeoAtlas',
None)
374 peeked_data[
'conditions_tag'] = taginfo.get(
'IOVDbGlobalTag',
None)
375 peeked_data[
'beam_type'] = [taginfo.get(
'beam_type',
'N/A')]
376 peeked_data[
'beam_energy']= [maybe_float(taginfo.get(
'beam_energy',
379 if 'evt_type' not in peeked_data:
380 if '/Simulation/Parameters' in metadata:
381 peeked_data[
'evt_type'] = [
'IS_SIMULATION',
'IS_ATLAS',
'IS_PHYSICS']
382 peeked_data[
'run_number'] = [metadata[
'/Simulation/Parameters'].
get(
'RunNumber',
'')]
384 peeked_data[
'evt_type'] = []
386 if 'geometry' not in peeked_data:
387 peeked_data[
'geometry'] =
None
388 if 'conditions_tag' not in peeked_data:
389 peeked_data[
'conditions_tag'] =
None
390 if 'det_descr_tags' not in peeked_data:
391 peeked_data[
'det_descr_tags'] = {}
395 print (
':::::: summary ::::::', file=stdout)
396 print (
' - nbr events: %s' % peeked_data[
'nentries'], file=stdout)
397 print (
' - run numbers: %s' % peeked_data[
'run_number'], file=stdout)
398 print (
' - lumiblocks: %s' % peeked_data[
'lumi_block'], file=stdout)
399 print (
' - evt types: ', peeked_data[
'evt_type'], file=stdout)
400 print (
' - item list: %s' % len(peeked_data[
'eventdata_items']), file=stdout)
401 print (
' - processing tags: %s' % peeked_data[
'stream_names'], file=stdout)
402 print (
' - geometry: %s' % peeked_data[
'geometry'], file=stdout)
403 print (
' - conditions tag: %s' % peeked_data[
'conditions_tag'], file=stdout)
404 print (
' - tag-info: %s' % peeked_data[
'tag_info'].
keys(), file=stdout)
411 """save file informations using sqlite"""
413 fd_pkl,out_pkl_fname = tempfile.mkstemp(suffix=
'.pkl')
415 if os.path.exists(out_pkl_fname):
416 os.remove(out_pkl_fname)
417 oname = os.path.expanduser(os.path.expandvars(out_pkl_fname))
418 if os.path.exists(oname):
421 import PyUtils.dbsqlite
as dbsqlite
422 db = dbsqlite.open(oname,flags=
'w')
426 if os.path.exists(oname)
and len(self.
peeked_data) > 0:
427 return 0, out_pkl_fname
430 return 1, out_pkl_fname