ATLAS Offline Software
PoolFile.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2 
3 # @author: Sebastien Binet <binet@cern.ch>
4 # @date: March 2007
5 #
6 #
7 
8 __author__ = "Sebastien Binet <binet@cern.ch>"
9 
10 
11 __all__ = [
12  'PoolFileCatalog',
13  'PoolOpts',
14  'isRNTuple',
15  'PoolRecord',
16  'PoolFile',
17  'DiffFiles',
18  ]
19 
20 
21 import sys
22 import os
23 import shelve
24 
25 from dbm import whichdb
26 
27 from .Helpers import ShutUp
28 
29 
30 class Units (object):
31  kb = 1024.
32  Mb = 1024.*1024.
33 
34 
35 def isRNTuple(obj):
36  # MN: remove the "try" after migration to ROOT 6.34
37  try: from ROOT import RNTuple
38  except(ImportError): from ROOT.Experimental import RNTuple
39  return isinstance( obj, RNTuple )
40 
41 
43  """ reverse-engineering of the POOL FileCatalog.
44  allows to retrieve the physical filename from a logical one, provided
45  that the file-id is known to the (real) PoolFileCatalog
46  """
47  DefaultCatalog = "xmlcatalog_file:PoolFileCatalog.xml"
48  AllowedProtocols = (
49  # see: PoolSvc::createCatalog
50  # http://alxr.usatlas.bnl.gov/lxr/source/atlas/Database/AthenaPOOL/PoolSvc/src/PoolSvc.cxx?v=head#736
51  "xmlcatalog_file:", # POOL default
52  "apcfile:", # ATLAS_POOLCOND_PATH
53  "prfile:", # file via PathResolver
54  "file:", # simple file on local FS
55  )
56 
57  def __init__ (self, catalog=None):
58  super (PoolFileCatalog, self).__init__()
59  self.catalog = None
60 
61  if catalog is None:
62  # chase poolfilecatalog location
63  catalog = os.environ.get("POOL_CATALOG", self.DefaultCatalog)
64 
65  if isinstance(catalog, str):
66  catalog = [catalog]
67 
68  if not isinstance (catalog, (str, list)):
69  raise TypeError(
70  "catalog contact string should be a string or a list thereof! (got %r)"%
71  type(catalog))
72 
73  osp = os.path
74  def osp_exp(x):
75  return osp.expanduser(osp.expandvars(x))
76 
77  def _handle_apcfile_old(x):
78  """ return $ATLAS_POOLCOND_PATH/poolcond/x
79  """
80  if 'ATLAS_POOLCOND_PATH' not in os.environ:
81  return osp_exp(x)
82  pcp = os.environ["ATLAS_POOLCOND_PATH"]
83  if x.startswith("apcfile:"):
84  x = x[len("apcfile:"):]
85  return osp_exp(osp.join(pcp, 'poolcond', x))
86 
87  def _handle_apcfile(x):
88  """ return $ATLAS_POOLCOND_PATH/x
89  """
90  if 'ATLAS_POOLCOND_PATH' not in os.environ:
91  return osp_exp(x)
92  pcp = os.environ["ATLAS_POOLCOND_PATH"]
93  if x.startswith("apcfile:"):
94  x = x[len("apcfile:"):]
95  return osp_exp(osp.join(pcp, x))
96 
97  def _handle_xmlcatalog_file(x):
98  return osp_exp(x[len("xmlcatalog_file:"):])
99 
100  def _handle_prfile(x):
101  x = x[len("prfile:"):]
102  x = osp_exp(x)
103  try:
104  import AthenaCommon.Utils.unixtools as u
105  return u.FindFile(x,
106  os.environ['DATAPATH'].split(os.pathsep),
107  os.R_OK)
108  except ImportError:
109  return x
110 
111  def _handle_file(x):
112  x = x[len("file:"):]
113  x = osp_exp(x)
114  return x
115 
116  cat_dispatch = {
117  "xmlcatalog_file:": _handle_xmlcatalog_file,
118  "apcfile:": _handle_apcfile,
119  "prfile:": _handle_prfile,
120  "file:": _handle_file,
121  }
122  assert sorted(cat_dispatch.keys()) == sorted(self.AllowedProtocols), \
123  "catalog dispatch keys does not match AllowedProtocols:" \
124  "\n%s\n%s" % (sorted(cat_dispatch.keys()),
125  sorted(self.AllowedProtocols))
126 
127  from . import xmldict
128  def _build_catalog(catalog):
129  if not catalog.startswith(self.AllowedProtocols):
130  raise ValueError(
131  "sorry PoolFile:PoolFileCatalog only supports %s"
132  " as a protocol for the POOL file catalog (got: '%s')"
133  % (self.AllowedProtocols, catalog)
134  )
135  for protocol, handler in cat_dispatch.iteritems():
136  if catalog.startswith(protocol):
137  catalog = handler(catalog)
138  break
139  # make sure the catalog exists...
140  import os
141 
142  if not os.path.exists (catalog):
143  return {}
144  # raise RuntimeError(
145  # 'could not find any PoolFileCatalog in [%s]' % catalog
146  # )
147 
148 
149  root = xmldict.ElementTree.parse (catalog).getroot()
150  return dict(xmldict.xml2dict(root))
151 
152  errors = []
153  cat = {'POOLFILECATALOG':{'File':[]}}
154  for c in catalog:
155  try:
156  bc = _build_catalog(c)
157  pc = bc.get('POOLFILECATALOG',{})
158  files = []
159  if pc:
160  files = pc.get('File',[])
161  if isinstance(files, dict):
162  files = [files]
163  cat['POOLFILECATALOG']['File'].extend(files)
164  except Exception as err:
165  errors.append(err)
166 
167  if errors:
168  raise errors[0] # FIXME : should we customize this a bit ?
169 
170  self.catalog = cat
171  pass
172 
173  def pfn (self, url_or_fid):
174  """find the physical file name given a url or a file-id"""
175  import os.path as osp
176  url_or_fid = osp.expanduser(osp.expandvars(url_or_fid))
177  import types
178  if isinstance (url_or_fid, types.ListType):
179  return [self._pfn(f) for f in url_or_fid]
180  else:
181  return self._pfn(url_or_fid)
182 
183  def _pfn (self, url_or_fid):
184  """find the physical file name given a url or a file-id"""
185  if not ('POOLFILECATALOG' in self.catalog):
186  return None
187  if not ('File' in self.catalog['POOLFILECATALOG']):
188  return None
189 
190  PFN_IDX = 0 # take this pfn when alternates exist
191 
192  files = self.catalog['POOLFILECATALOG']['File']
193  if isinstance(files, dict):
194  # in case there where only one entry in the catalog
195  files = [files]
196  import re
197  if url_or_fid.lower().startswith('fid:'):
198  url_or_fid = url_or_fid[len('fid:'):]
199  if re.compile (r'\w{8}-\w{4}-\w{4}-\w{4}-\w{12}$').match (url_or_fid):
200  fid = url_or_fid.lower()
201  # better to check consistency of catalog over all entries
202  # than declare success on first match...
203  match = {}
204  for f in files:
205  if f.ID.lower() == fid:
206  match[fid] = []
207  pfn = f.physical.pfn
208  if isinstance(pfn, (list,tuple)):
209  match[fid].append([i.name for i in pfn])
210  else:
211  match[fid].append([pfn.name])
212  if len(match[fid])==1:
213  return match[fid][0][PFN_IDX]
214  if len(match[fid])>1:
215  raise LookupError (
216  "more than one match for FID='%s'!\n%r"%(fid,match)
217  )
218  raise KeyError ("no entry with FID='%s' in catalog" % fid)
219  else:
220  url = url_or_fid
221  if url.lower().startswith("lfn:"):
222  url = url[len("lfn:"):]
223  # better to check consistency of catalog over all entries
224  # than declare success on first match...
225  match = {}
226  for f in files:
227  if (f.logical != '' # no LFN for this entry
228  and f.logical.lfn.name == url):
229  match[url] = []
230  pfn = f.physical.pfn
231  if isinstance(pfn, (list,tuple)):
232  match[url].append([i.name for i in pfn])
233  else:
234  match[url].append([pfn.name])
235  if len(match[url])==1:
236  return match[url][0][PFN_IDX]
237  if len(match[url])>1:
238  raise LookupError (
239  "more than one match for LFN='%s'!\n%r"%(url,match)
240  )
241  raise KeyError ("no entry with LFN='%s' in catalog" % url)
242  # assume that if not LFN: then PFN:, no matter what...
243  if url.lower().startswith("pfn:"):
244  url = url[len("pfn:"):]
245  return url
246 
247  def __call__ (self, url_or_fid):
248  return self.pfn (url_or_fid)
249 
250  pass
251 
253  # default names of APR file storage elements
254  # copied here from RootUtils/APRDefaults.h for performance (as the first dictionary access takes 7 sec)
255  # see ATEAM-973 for a more detailed discussion
256  # the definitions here should be kept in sync with those!
257  class TTreeNames:
258  EventData = "CollectionTree"
259  EventTag = "POOLCollectionTree"
260  DataHeader = "POOLContainer"
261  MetaData = "MetaData"
263  EventData = "EventData"
264  EventTag = "EventTag"
265  DataHeader = "DataHeader"
266  MetaData = "MetaData"
267 
268  FAST_MODE = False
269  SUPER_DETAILED_BRANCH_SZ = False
270  READ_MODE = "READ"
271  POOL_HEADER = TTreeNames.DataHeader
272  EVENT_DATA = TTreeNames.EventData
273  META_DATA = TTreeNames.MetaData
274  HDR_FORMAT = " %11s %11s %11s %11s %5s %s"
275  ROW_FORMAT = "%12.3f kb %12.3f kb %12.3f kb %12.3f %8i %s"
276 
277  @classmethod
278  def isData(cls, name):
279  return not name.startswith("##") and not cls.isDataHeader(name)
280 
281  @classmethod
282  def isDataHeader(cls, name):
283  return name in {cls.TTreeNames.DataHeader
284  , cls.TTreeNames.DataHeader+"_DataHeader"
285  , cls.RNTupleNames.DataHeader}
286 
287  @classmethod
288  def isEventData(cls, name):
289  return name.startswith(PoolOpts.EVENT_DATA)
290 
291  @classmethod
292  def isAugmentation(cls, name):
293  return "_DAOD_" in name
294 
295  @classmethod
296  def augmentationName(cls, name):
297  s = (name+"__").split('_')[2]
298  if s.endswith("Form"):
299  s = s[:-4]
300  return s
301 
302  @classmethod
303  def isAugmentedHeader(cls, name):
304  return name.startswith(PoolOpts.POOL_HEADER) and cls.isAugmentation(name)
305 
306  pass # class PoolOpts
307 
308 def _get_total_size (branch):
309  if PoolOpts.FAST_MODE:
310  return -1.
311  if not PoolOpts.SUPER_DETAILED_BRANCH_SZ:
312  return branch.GetTotalSize()
313  brSize = 0
314  branch.LoadBaskets()
315  for bnum in range(0, branch.GetWriteBasket()):
316  basket = branch.GetBasket(bnum)
317  brSize += basket.GetObjlen() - 8
318  return brSize
319 
320 def file_name(fname):
321  """take a file name, return the pair (protocol, 'real' file name)
322  """
323  fname = os.path.expanduser(os.path.expandvars(fname))
324 
325  def _normalize_uri(uri):
326  if uri.startswith('/'):
327  return 'file:'+uri
328  return uri
329 
330  from urllib.parse import urlsplit
331  url = urlsplit(_normalize_uri(fname))
332  protocol = url.scheme
333  def _normalize(fname):
334  from posixpath import normpath
335  fname = normpath(fname)
336  if fname.startswith('//'): fname = fname[1:]
337  return fname
338 
339  if protocol in ('', 'file', 'pfn'):
340  protocol = ''
341  fname = _normalize(url.path)
342 
343 
344  if fname.startswith('/castor/'):
345  protocol = 'rfio'
346  fname = protocol + ':' + fname
347 
348  elif protocol in ('rfio', 'castor'):
349  protocol = 'rfio'
350  fname = _normalize(url.path)
351  fname = protocol+':'+fname
352 
353  elif protocol in ('root','dcap', 'dcache', 'http', 'https', 'dav', 'davs'):
354  pass
355 
356  elif protocol in ('gsidcap',):
357  protocol = 'gfal:gsidcap'
358  pass
359 
360  elif protocol in ('lfn','fid',):
361  # percolate through the PoolFileCatalog
362  from PyUtils.PoolFile import PoolFileCatalog as pfc
363  fname = pfc().pfn(protocol+':'+url.path)
364  pass
365 
366  elif protocol in ('ami',):
367  # !! keep order of tokens !
368  for token in ('ami:', '//', '/'):
369  if fname.startswith(token):
370  fname = fname[len(token):]
371  fname = 'ami://' + fname
372  pass
373 
374  else:
375  print(f'## warning: unknown protocol [{protocol}]. we will just return our input')
376  pass
377 
378  return (protocol, fname)
379 
380 def _setup_ssl(root):
381  x509_proxy = os.environ.get('X509_USER_PROXY', '')
382  if x509_proxy:
383  # setup proper credentials
384  root.TSSLSocket.SetUpSSL(
385  x509_proxy,
386  "/etc/grid-security/certificates",
387  x509_proxy,
388  x509_proxy)
389  else:
390  print("## warning: protocol https is requested but no X509_USER_PROXY was found! (opening the file might fail.)")
391  pass
392  return
393 
394 def _root_open(fname):
395  import PyUtils.RootUtils as ru
396  root = ru.import_root()
397  import re
398 
399  with ShutUp(filters=[
400  re.compile('TClass::TClass:0: RuntimeWarning: no dictionary for class.*') ]):
401  root.gSystem.Load('libRootCollection')
402  root_open = root.TFile.Open
403 
404  # we need to get back the protocol b/c of the special
405  # case of secure-http which needs to open TFiles as TWebFiles...
406  protocol, _ = file_name(fname)
407  if protocol == 'https':
408  _setup_ssl(root)
409  root_open = root.TWebFile.Open
410 
411  f = root_open(fname, 'READ')
412  if f is None or not f:
413  import errno
414  raise IOError(errno.ENOENT,
415  'No such file or directory',fname)
416  return f
417  return
418 
419 def retrieveBranchInfos( branch, poolRecord, ident = "" ):
420  fmt = "%s %3i %8.3f %8.3f %8.3f %s"
421  if 0:
422  out = fmt % ( ident,
423  branch.GetListOfBranches().GetSize(),
424  _get_total_size (branch),
425  branch.GetTotBytes(),
426  branch.GetZipBytes(),
427  branch.GetName() )
428  print(out)
429 
430  branches = branch.GetListOfBranches()
431  for b in branches:
432  poolRecord.memSize += _get_total_size (b) / Units.kb
433  if (b.GetZipBytes() < 0.001):
434  poolRecord.memSizeNoZip += _get_total_size (b) / Units.kb
435  poolRecord.diskSize += b.GetZipBytes() / Units.kb
436  poolRecord = retrieveBranchInfos ( b, poolRecord, ident+" " )
437 
438  return poolRecord
439 
440 def make_pool_record (branch, dirType):
441  memSize = _get_total_size (branch) / Units.kb
442  zipBytes = branch.GetZipBytes()
443  memSizeNoZip = memSize if zipBytes < 0.001 else 0.
444  diskSize = branch.GetZipBytes() / Units.kb
445  typeName = branch.GetClassName()
446  if not typeName and (leaf := branch.GetListOfLeaves().At(0)):
447  typeName = leaf.GetTypeName()
448  return PoolRecord(branch.GetName(), memSize, diskSize, memSizeNoZip,
449  branch.GetEntries(),
450  dirType=dirType,
451  typeName=typeName)
452 
454  """
455  """
456  class Sorter:
457  DiskSize = "diskSize"
458  MemSize = "memSize"
459  ContainerName = "name"
460 
461  @staticmethod
463  return [ PoolRecord.Sorter.DiskSize,
464  PoolRecord.Sorter.MemSize,
465  PoolRecord.Sorter.ContainerName ]
466  pass
467  def __init__(self, name, memSize, diskSize, memSizeNoZip, nEntries, dirType,
468  detailedInfos = "", typeName = None):
469  """Initialize PoolRecord instance.
470 
471  dirType first letter of object type name that may distinguish the types:
472  "T" for TTree, "B" for TBranch,
473  "N" for RNTuple, "F" for RField
474  """
475  object.__init__(self)
476  self.name = name
477  self.memSize = memSize
478  self.diskSize = diskSize
479  self.memSizeNoZip = memSizeNoZip
480  self.nEntries = nEntries
481  self.dirType = dirType
482  self.details = detailedInfos
483  self.augName = ''
484  self.typeName = typeName
485  return
486 
488  """
489  A simple class to retrieve informations about the content of a POOL file.
490  It should be abstracted from the underlying technology used to create this
491  POOL file (Db, ROOT,...).
492  Right now, we are using the easy and loosy solution: going straight to the
493  ROOT 'API'.
494  """
495 
496  def __init__(self, fileName, verbose=True):
497  object.__init__(self)
498 
499  self._fileInfos = None
500  self.keys = None
501  self.dataHeader = PoolRecord("DataHeader", 0, 0, 0,
502  nEntries = 0,
503  dirType = "T")
504  self.augNames = set()
505  self.dataHeaderA = {}
506  self.data = []
507  self.verbose = verbose
508 
509  # get the "final" file name (handles all kind of protocols)
510  try:
511  protocol, fileName = file_name(fileName)
512  except Exception as err:
513  print("## warning: problem opening PoolFileCatalog:\n%s"%err)
514  import traceback
515  traceback.print_exc(err)
516  pass
517 
518  self.poolFile = None
519  dbFileName = whichdb( fileName )
520  if dbFileName not in ( None, '' ):
521  if self.verbose is True:
522  print("## opening file [%s]..." % str(fileName))
523  db = shelve.open( fileName, 'r' )
524  if self.verbose is True:
525  print("## opening file [OK]")
526  report = db['report']
527  self._fileInfos = report['fileInfos']
528  self.dataHeader = report['dataHeader']
529  self.data = report['data']
530  else:
531  if self.verbose is True:
532  print("## opening file [%s]..." % str(fileName))
533  self.__openPoolFile( fileName )
534  if self.verbose is True:
535  print("## opening file [OK]")
536  self.__processFile()
537 
538  return
539 
540  def __openPoolFile(self, fileName):
541  # hack to prevent ROOT from loading graphic libraries and hence bother
542  # our fellow Mac users
543  if self.verbose is True:
544  print("## importing ROOT...")
545  import PyUtils.RootUtils as ru
546  ROOT = ru.import_root()
547  self.ROOT = ROOT
548  if self.verbose is True:
549  print("## importing ROOT... [DONE]")
550  # prevent ROOT from being too verbose
551  rootMsg = ShutUp()
552  rootMsg.mute()
553  ROOT.gErrorIgnoreLevel = ROOT.kFatal
554 
555  poolFile = None
556  try:
557  poolFile = ROOT.TFile.Open( fileName, PoolOpts.READ_MODE )
558  except Exception as e:
559  rootMsg.unMute()
560  print("## Failed to open file [%s] !!" % fileName)
561  print("## Reason:")
562  print(e)
563  print("## Bailing out...")
564  raise IOError("Could not open file [%s]" % fileName)
565 
566  rootMsg.unMute()
567 
568  if poolFile is None:
569  print("## Failed to open file [%s] !!" % fileName)
570  msg = "Could not open file [%s]" % fileName
571  raise IOError(msg)
572 
573  self.poolFile = poolFile
574  assert self.poolFile.IsOpen() and not self.poolFile.IsZombie(), \
575  "Invalid POOL file or a Zombie one"
576  self._fileInfos = {
577  'name' : self.poolFile.GetName(),
578  'size' : self.poolFile.GetSize(),
579  }
580  return
581 
582  def __processFile(self):
583 
584  for name in {PoolOpts.TTreeNames.DataHeader, PoolOpts.RNTupleNames.DataHeader}:
585  dhKey = self.poolFile.FindKey( name )
586  if dhKey:
587  obj = self.poolFile.Get( name )
588  if isinstance(obj, self.ROOT.TTree):
589  nEntries = obj.GetEntries()
590  elif isRNTuple(obj):
591  try:
592  nEntries = self.ROOT.Experimental.RNTupleReader.Open(obj).GetNEntries()
593  except AttributeError:
594  # ROOT 6.36 and later
595  nEntries = self.ROOT.RNTupleReader.Open(obj).GetNEntries()
596  else:
597  raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
598  break
599  else:
600  nEntries = 0
601 
602  keys = []
603  containers = []
604  for k in self.poolFile.GetListOfKeys():
605  keyname = k.GetName()
606  obj = self.poolFile.Get( keyname )
607  if isinstance(obj, self.ROOT.TTree):
608  containerName = obj.GetName()
609  nEntries = obj.GetEntries()
610  dirType = "T"
611  elif isRNTuple(obj):
612  try:
613  reader = self.ROOT.Experimental.RNTupleReader.Open(obj)
614  except AttributeError:
615  # ROOT 6.36 and later
616  reader = self.ROOT.RNTupleReader.Open(obj)
617  containerName = reader.GetDescriptor().GetName()
618  nEntries = reader.GetNEntries()
619  dirType = "N"
620  else:
621  raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
622  if containerName not in containers:
623  keys.append(k)
624  containers.append(containerName)
625  pass
626  if keyname.startswith(PoolOpts.POOL_HEADER) and not keyname.endswith('Form'):
627  self.dataHeaderA[PoolOpts.augmentationName(keyname)] = \
628  PoolRecord("DataHeader", 0, 0, 0,
629  nEntries = nEntries,
630  dirType = dirType)
631 
632  keys.sort (key = lambda x: x.GetName())
633  self.keys = keys
634  del containers
635 
636  for k in keys:
637  obj = self.poolFile.Get( k.GetName() )
638  if isinstance(obj, self.ROOT.TTree):
639  name = obj.GetName()
640  elif isRNTuple(obj):
641  try:
642  inspector = self.ROOT.Experimental.RNTupleInspector.Create(obj)
643  except AttributeError:
644  inspector = self.ROOT.RNTupleInspector.Create(obj)
645  name = inspector.GetDescriptor().GetName()
646 
647  if PoolOpts.isDataHeader(name):
648  contName = "DataHeader"
649  if isinstance(obj, self.ROOT.TTree):
650  memSize = obj.GetTotBytes() / Units.kb
651  diskSize = obj.GetZipBytes() / Units.kb
652  memSizeNoZip = 0.0
653  if diskSize < 0.001:
654  memSizeNoZip = memSize
655  nEntries = obj.GetEntries()
656 
658  dhBranchNames = [
659  br.GetName() for br in obj.GetListOfBranches()
660  if br.GetName().count("DataHeader_p") > 0
661  ]
662  if len(dhBranchNames) == 1:
663  dhBranch = obj.GetBranch(dhBranchNames[0])
664  typeName = dhBranch.GetClassName()
665  if not typeName and (leaf := dhBranch.GetListOfLeaves().At(0)):
666  typeName = leaf.GetTypeName()
667  poolRecord = retrieveBranchInfos(
668  dhBranch,
669  PoolRecord( contName, 0., 0., 0.,
670  nEntries,
671  dirType = "T",
672  typeName = typeName ),
673  ident = " "
674  )
675  else:
676  poolRecord = PoolRecord(contName, memSize, diskSize, memSizeNoZip,
677  nEntries,
678  dirType = "T")
679 
680  self.dataHeader = poolRecord
681  elif isRNTuple(obj):
682  diskSize = inspector.GetCompressedSize() / Units.kb
683  memSize = inspector.GetUncompressedSize() / Units.kb
684 
685  memSizeNoZip = 0.0
686  if diskSize < 0.001:
687  memSizeNoZip = memSize
688  nEntries = inspector.GetDescriptor().GetNEntries()
689  poolRecord = PoolRecord(contName, memSize, diskSize, memSizeNoZip,
690  nEntries,
691  dirType = "N")
692  self.dataHeader = poolRecord
693  elif PoolOpts.isData(name):
694  if isinstance(obj, self.ROOT.TTree):
695  if not hasattr(obj, 'GetListOfBranches'):
696  continue
697  branches = obj.GetListOfBranches()
698  dirType = "T"
699  if name in (PoolOpts.EVENT_DATA, PoolOpts.META_DATA):
700  dirType = "B"
701  for branch in branches:
702  poolRecord = retrieveBranchInfos(
703  branch,
704  make_pool_record(branch, dirType),
705  ident = " "
706  )
707  poolRecord.augName = PoolOpts.augmentationName(name)
708  self.augNames.add(poolRecord.augName)
709  self.data += [ poolRecord ]
710  elif isRNTuple(obj):
711  descriptor = inspector.GetDescriptor()
712  dirType = "N"
713  if name in {PoolOpts.RNTupleNames.EventData, PoolOpts.RNTupleNames.MetaData}:
714  dirType = "F"
715  fieldZeroId = descriptor.GetFieldZeroId()
716  for fieldDescriptor in descriptor.GetFieldIterable(fieldZeroId):
717  fieldId = fieldDescriptor.GetId()
718  fieldTreeInspector = inspector.GetFieldTreeInspector(fieldId)
719  diskSize = fieldTreeInspector.GetCompressedSize() / Units.kb
720  memSize = fieldTreeInspector.GetUncompressedSize() / Units.kb
721  typeName = fieldDescriptor.GetTypeName()
722  fieldName = fieldDescriptor.GetFieldName()
723  poolRecord = PoolRecord(fieldName, memSize, diskSize, memSize,
724  descriptor.GetNEntries(),
725  dirType=dirType,
726  typeName=typeName)
727  poolRecord.augName = PoolOpts.augmentationName(name)
728  self.augNames.add(poolRecord.augName)
729  self.data += [ poolRecord ]
730  # loop over keys
731 
732  return
733 
734  def fileInfos(self):
735  return os.linesep.join( [
736  "File:" + self._fileInfos['name'],
737  "Size: %12.3f kb" % (self._fileInfos['size'] / Units.kb),
738  "Nbr Events: %i" % self.dataHeader.nEntries
739  ] )
740 
741 
742  def checkFile(self, sorting = PoolRecord.Sorter.DiskSize):
743  if self.verbose is True:
744  print(self.fileInfos())
745  if len(self.augNames) > 1:
746  for aug in self.augNames:
747  if len(aug) > 0:
748  print( "Nbr %s Events: %i" % (aug, self.dataHeaderA[aug].nEntries) )
749 
750 
751  data = self.data
752  if sorting in PoolRecord.Sorter.allowedValues():
753  import operator
754  data.sort(key = operator.attrgetter(sorting) )
755 
756  def _get_val(x, dflt=-999.):
757  if PoolOpts.FAST_MODE:
758  return dflt
759  return x
760 
761  totMemSize = _get_val(self.dataHeader.memSize, dflt=0.)
762  totDiskSize = self.dataHeader.diskSize
763 
764  def _safe_div(num,den):
765  if float(den) == 0.:
766  return 0.
767  return num/den
768 
769  if self.verbose is True:
770  print("")
771  print("="*80)
772  print(PoolOpts.HDR_FORMAT % ( "Mem Size", "Disk Size","Size/Evt",
773  "MissZip/Mem","items",
774  "(X) Container Name (X=Tree|Branch)" ))
775  print("="*80)
776 
777  print(PoolOpts.ROW_FORMAT % (
778  _get_val (self.dataHeader.memSize),
779  self.dataHeader.diskSize,
780  _safe_div(self.dataHeader.diskSize,float(self.dataHeader.nEntries)),
781  _get_val (_safe_div(self.dataHeader.memSizeNoZip,
782  self.dataHeader.memSize)),
783  self.dataHeader.nEntries,
784  "("+self.dataHeader.dirType+") "+self.dataHeader.name
785  ))
786  print("-"*80)
787 
788  totMemSizeA = {}
789  totDiskSizeA = {}
790  for d in data:
791  totMemSize += 0. if PoolOpts.FAST_MODE else d.memSize
792  totDiskSize += d.diskSize
793  memSizeNoZip = d.memSizeNoZip/d.memSize if d.memSize != 0. else 0.
794  aug = d.augName
795  totMemSizeA[aug] = totMemSizeA.get(aug,0.) + d.memSize
796  totDiskSizeA[aug] = totDiskSizeA.get(aug,0.) + d.diskSize
797  if self.verbose is True:
798  print(PoolOpts.ROW_FORMAT % (
799  _get_val (d.memSize),
800  d.diskSize,
801  _safe_div(d.diskSize, float(self.dataHeader.nEntries)),
802  _get_val (memSizeNoZip),
803  d.nEntries,
804  "("+d.dirType+") "+d.name
805  ))
806 
807  if self.verbose is True:
808  print("="*80)
809  if len(self.augNames) > 1:
810  augs = sorted(self.augNames)
811  for a in augs:
812  print(PoolOpts.ROW_FORMAT % (
813  totMemSizeA[a], totDiskSizeA[a],
814  _safe_div(totDiskSizeA[a], float(self.dataHeaderA[a].nEntries)),
815  0.0,
816  self.dataHeaderA[a].nEntries,
817  "Aug Stream: " + ('MAIN' if a=='' else a)
818  ))
819  print("-"*80)
820  print(PoolOpts.ROW_FORMAT % (
821  totMemSize, totDiskSize,
822  _safe_div(totDiskSize, float(self.dataHeader.nEntries)),
823  0.0, self.dataHeader.nEntries,
824  "TOTAL (POOL containers)"
825  ))
826  print("="*80)
827  if PoolOpts.FAST_MODE:
828  print("::: warning: FAST_MODE was enabled: some columns' content ",)
829  print("is meaningless...")
830  return
831 
832  def detailedDump(self, bufferName = None ):
833  if self.poolFile is None or \
834  self.keys is None:
835  print("Can't perform a detailedDump with a shelve file as input !")
836  return
837 
838  if bufferName is None:
839  bufferName = "/dev/stdout"
840  out = open( bufferName, "w" )
841  sys.stdout.flush()
842  save_stdout_fileno = os.dup (sys.stdout.fileno())
843  os.dup2( out.fileno(), sys.stdout.fileno() )
844 
845  out.write( "#" * 80 + os.linesep )
846  out.write( "## detailed dump" + os.linesep )
847  out.flush()
848 
849  for key in self.keys:
850  tree = key.ReadObj()
851  name = tree.GetName()
852 
853  if PoolOpts.isDataHeader(name) or \
854  PoolOpts.isData(name):
855  try:
856  print ("=== [%s] ===" % name, file=sys.stderr)
857  tree.Print()
858  except Exception as err:
859  print ("Caught:",err, file=sys.stderr)
860  print (sys.exc_info()[0], file=sys.stderr)
861  print (sys.exc_info()[1], file=sys.stderr)
862  pass
863  pass
864  pass
865  out.write( "#" * 80 + os.linesep )
866  out.flush()
867  out.write( "#" * 80 + os.linesep )
868 
872  out.flush()
873  if bufferName != "<stdout>":
874  out.close()
875  sys.stdout.close()
876  sys.stdout = open (save_stdout_fileno, 'a')
877  return
878 
879  def poolRecord(self, name):
880  """
881  Return a PoolRecord according to its (branch) name
882  Raise KeyError if no match is found
883  """
884  for data in self.data:
885  if data.name == name:
886  return data
887  raise KeyError("No PoolRecord with name [%s]" % name)
888 
889  def saveReport (self, fileName):
890  """
891  Save all the gathered informations into a python shelve or a CSV file
892  (depending on the @param `fileName` extension)
893  """
894  import os
895  if os.path.splitext(fileName)[-1] == '.csv':
896  return self._save_csv_report (fileName)
897  return self._save_shelve_report (fileName)
898 
899  def _save_shelve_report(self, fileName):
900  """
901  Save all the gathered informations into a python shelve
902  Data can then be read like so:
903  >>> import shelve
904  >>> db = shelve.open( 'myfile.dat', 'r' )
905  >>> report = db['report']
906  >>> print ('fileSize:',report['fileSize'])
907  >>> print ('dataHeader/memSize:',report['dataHeader'].memSize)
908  >>> for d in report['data']:
909  ... print ('data:',d.name,d.nEntries,d.memSize)
910  """
911  import shelve, os
912  if os.path.exists (fileName):
913  os.unlink (fileName)
914  db = shelve.open (fileName)
915  db['report'] = {
916  'fileInfos' : self._fileInfos,
917  'nbrEvts' : self.dataHeader.nEntries,
918  'dataHeader' : self.dataHeader,
919  'data' : self.data
920  }
921  db.close()
922  return
923 
924  def _save_csv_report(self, fileName):
925  """
926  Save all the gathered informations into a CSV file
927  """
928  import csv, os
929  if os.path.exists (fileName):
930  os.unlink (fileName)
931  args = {'newline' : ''}
932  f = open (fileName, 'w', **args)
933  o = csv.writer (f)
934  o.writerow (['file name', self._fileInfos['name']])
935  o.writerow (['file size', self._fileInfos['size']])
936  o.writerow (['nbr evts', self.dataHeader.nEntries])
937  o.writerow (['mem size', 'disk size', 'mem size nozip', 'items',
938  'container name', 'branch type'])
939 
940  for d in self.data:
941  o.writerow ([d.memSize, d.diskSize, d.memSizeNoZip,
942  d.nEntries, d.name, d.dirType])
943  f.close()
944  return
945 
946  def __del__(self):
947  if self.poolFile and hasattr(self.poolFile, 'Close'):
948  try:
949  self.poolFile.Close()
950  self.poolFile = None
951  except Exception as err:
952  print("WARNING:",err)
953  pass
954 
955  pass # class PoolFile
956 
958  """
959  A helper class to compare 2 POOL files and check that they match, both in
960  terms of containers' content and containers' sizes
961  """
962 
963  def __init__(self, refFileName, chkFileName, verbose = False, ignoreList = None, strict = False):
964  object.__init__(self)
965 
966  self.verbose = verbose
967  self.strict = strict
968  refFileName = os.path.expandvars( os.path.expanduser( refFileName ) )
969  chkFileName = os.path.expandvars( os.path.expanduser( chkFileName ) )
970 
971  if ignoreList is None:
972  ignoreList = []
973 
974  try:
975  self.refFile = PoolFile( refFileName )
976  self.chkFile = PoolFile( chkFileName )
977  self.ignList = sorted( ignoreList )
978  except Exception as err:
979  print("## Caught exception [%s] !!" % str(err.__class__))
980  print("## What:",err)
981  print(sys.exc_info()[0])
982  print(sys.exc_info()[1])
983  err = "Error while opening POOL files !"
984  err += " chk : %s%s" % ( chkFileName, os.linesep )
985  err += " ref : %s%s" % ( refFileName, os.linesep )
986  raise Exception(err)
987 
988  self.allGood = True
989  self.summary = []
990 
991  self.__checkDiff()
992  return
993 
994  def __checkDiff(self):
995 
996  self.summary += [
997  "=" * 80,
998  "::: Comparing POOL files...",
999  " ref : %s" % self.refFile._fileInfos['name'],
1000  " chk : %s" % self.chkFile._fileInfos['name'],
1001  "-" * 80,
1002  ]
1003 
1004  if self.chkFile.dataHeader.nEntries != \
1005  self.refFile.dataHeader.nEntries :
1006  self.summary += [
1007  "## WARNING: files don't have the same number of entries !!",
1008  " ref : %r" % self.refFile.dataHeader.nEntries,
1009  " chk : %r" % self.chkFile.dataHeader.nEntries,
1010  ]
1011 
1012  refNames = sorted( [d.name for d in self.refFile.data] )
1013  chkNames = sorted( [d.name for d in self.chkFile.data] )
1014 
1015  if chkNames != refNames:
1016  self.summary += [
1017  "## ERROR: files don't have the same content !!",
1018  ]
1019  addNames = [ n for n in chkNames if n not in refNames ]
1020  if len( addNames ) > 0:
1021  self.summary += [ "## collections in 'chk' and not in 'ref'" ]
1022  for n in addNames:
1023  self.summary += [ " + %s" % n ]
1024  subNames = [ n for n in refNames if n not in chkNames ]
1025  if len( subNames ) > 0:
1026  self.summary += [ "## collections in 'ref' and not in 'chk'" ]
1027  for n in subNames:
1028  self.summary += [ " - %s" % n ]
1029  self.allGood = False
1030  pass
1031 
1032  if len(self.ignList) > 0:
1033  self.summary += [ "## Ignoring the following:" ]
1034  for n in self.ignList:
1035  self.summary += [ " %s" % n ]
1036 
1037  commonContent = [ d for d in chkNames if (d in refNames and d not in self.ignList)]
1038 
1039  if not self.allGood:
1040  self.summary += [ "=" * 80 ]
1041  self.summary += [ "::: comparing common content (mem-size / disk-size)..." ]
1042 
1043  for name in commonContent:
1044  chkMemSize = self.chkFile.poolRecord(name).memSize
1045  refMemSize = self.refFile.poolRecord(name).memSize
1046  chkDiskSize = self.chkFile.poolRecord(name).diskSize
1047  refDiskSize = self.refFile.poolRecord(name).diskSize
1048 
1049  if chkMemSize != refMemSize or (self.strict and chkDiskSize != refDiskSize):
1050  self.summary += [
1051  "[ERR] %12.3f / %12.3f kb (ref) ==> %12.3f / %12.3f kb (chk) | %s" % \
1052  ( refMemSize,refDiskSize,chkMemSize,chkDiskSize, name )
1053  ]
1054  self.allGood = False
1055  elif self.verbose:
1056  self.summary += [
1057  " [OK] %12.3f/%12.3f kb | %s" % \
1058  ( chkMemSize, chkDiskSize, name )
1059  ]
1060 
1061  self.summary += [ "=" * 80 ]
1062 
1063 
1064  if self.allGood: self.summary += [ "## Comparison : [OK]" ]
1065  else: self.summary += [ "## Comparison : [ERR]" ]
1066 
1067  return self.allGood
1068 
1069  def status(self):
1070  if self.allGood: return 0
1071  else: return 1
1072 
1073  def printSummary(self, out = sys.stdout):
1074  for i in self.summary:
1075  out.writelines( i + os.linesep )
1076  pass
1077  return
1078 
1080  """
1081  A counter just contains an item list (pairs class-name/sg-key) and the size
1082  """
1083  size = 0
1084  def __init__(self, name, itemList):
1085  object.__init__(self)
1086  self.name = name
1087  self.itemList = itemList
1088  pass # Counter
1089 
1090 
python.PoolFile.PoolRecord.diskSize
diskSize
Definition: PoolFile.py:477
python.PoolFile.DiffFiles.summary
summary
Definition: PoolFile.py:989
python.PoolFile.PoolFile.augNames
augNames
Definition: PoolFile.py:504
python.PoolFile.DiffFiles.verbose
verbose
Definition: PoolFile.py:966
DerivationFramework::TriggerMatchingUtils::sorted
std::vector< typename R::value_type > sorted(const R &r, PROJ proj={})
Helper function to create a sorted vector from an unsorted range.
python.PoolFile.DiffFiles.__checkDiff
def __checkDiff(self)
Definition: PoolFile.py:994
python.PoolFile.PoolFile.poolFile
poolFile
Definition: PoolFile.py:518
python.PoolFile.DiffFiles.status
def status(self)
Definition: PoolFile.py:1069
python.PoolFile.PoolFile.keys
keys
first we try to fetch the DataHeader
Definition: PoolFile.py:500
python.PoolFile.PoolRecord.memSize
memSize
Definition: PoolFile.py:476
python.PoolFile.PoolFileCatalog.__init__
def __init__(self, catalog=None)
Definition: PoolFile.py:57
python.PoolFile.PoolOpts
Definition: PoolFile.py:252
python.PoolFile.PoolFile.checkFile
def checkFile(self, sorting=PoolRecord.Sorter.DiskSize)
Definition: PoolFile.py:742
python.PoolFile.PoolRecord.__init__
def __init__(self, name, memSize, diskSize, memSizeNoZip, nEntries, dirType, detailedInfos="", typeName=None)
Definition: PoolFile.py:467
python.PoolFile._save_shelve_report
def _save_shelve_report(self, fileName)
Definition: PoolFile.py:899
python.PoolFile.PoolOpts.RNTupleNames
Definition: PoolFile.py:262
python.PoolFile.__del__
def __del__(self)
Definition: PoolFile.py:946
python.PoolFile.PoolFile.dataHeaderA
dataHeaderA
Definition: PoolFile.py:505
python.PoolFile.PoolRecord.dirType
dirType
Definition: PoolFile.py:480
python.PoolFile.Counter.name
name
Definition: PoolFile.py:1086
python.PoolFile.poolRecord
def poolRecord(self, name)
Definition: PoolFile.py:879
python.PoolFile.PoolFile.__openPoolFile
def __openPoolFile(self, fileName)
Definition: PoolFile.py:540
python.PoolFile.PoolFileCatalog._pfn
def _pfn(self, url_or_fid)
Definition: PoolFile.py:183
python.PoolFile.saveReport
def saveReport(self, fileName)
Definition: PoolFile.py:889
python.PoolFile.PoolRecord.nEntries
nEntries
Definition: PoolFile.py:479
python.PoolFile.PoolRecord
Definition: PoolFile.py:453
python.PoolFile.file_name
def file_name(fname)
Definition: PoolFile.py:320
python.PoolFile.PoolRecord.Sorter
Definition: PoolFile.py:456
python.PoolFile.PoolFile.data
data
Definition: PoolFile.py:506
python.PoolFile.PoolFileCatalog.__call__
def __call__(self, url_or_fid)
Definition: PoolFile.py:247
dumpHVPathFromNtuple.append
bool append
Definition: dumpHVPathFromNtuple.py:91
python.PoolFile.PoolFile.ROOT
ROOT
Definition: PoolFile.py:547
python.PoolFile.retrieveBranchInfos
def retrieveBranchInfos(branch, poolRecord, ident="")
Definition: PoolFile.py:419
python.CaloAddPedShiftConfig.type
type
Definition: CaloAddPedShiftConfig.py:42
XMLtoHeader.count
count
Definition: XMLtoHeader.py:84
python.PoolFile.DiffFiles.refFile
refFile
Definition: PoolFile.py:975
handler
void handler(int sig)
signal handler
Definition: rmain.cxx:99
python.PoolFile.PoolOpts.isAugmentedHeader
def isAugmentedHeader(cls, name)
Definition: PoolFile.py:303
python.PoolFile.PoolFileCatalog.AllowedProtocols
AllowedProtocols
Definition: PoolFile.py:48
python.Helpers.ShutUp
Definition: Tools/PyUtils/python/Helpers.py:49
python.PoolFile.PoolFile.fileInfos
def fileInfos(self)
Definition: PoolFile.py:734
python.PoolFile.PoolOpts.isData
def isData(cls, name)
Definition: PoolFile.py:278
Get
T * Get(TFile &f, const std::string &n, const std::string &dir="", const chainmap_t *chainmap=0, std::vector< std::string > *saved=0)
get a histogram given a path, and an optional initial directory if histogram is not found,...
Definition: comparitor.cxx:181
python.PoolFile.DiffFiles.chkFile
chkFile
Definition: PoolFile.py:976
python.PoolFile.PoolFile.__processFile
def __processFile(self)
Definition: PoolFile.py:582
python.PoolFile.PoolOpts.augmentationName
def augmentationName(cls, name)
Definition: PoolFile.py:296
python.PoolFile.PoolOpts.TTreeNames
Definition: PoolFile.py:257
python.PoolFile.PoolFileCatalog.pfn
def pfn(self, url_or_fid)
Definition: PoolFile.py:173
python.PoolFile.Counter.__init__
def __init__(self, name, itemList)
Definition: PoolFile.py:1084
python.PoolFile.PoolFile.dataHeader
dataHeader
try to also handle non-T/P separated DataHeaders (from old files)...
Definition: PoolFile.py:501
python.PoolFile.PoolRecord.augName
augName
Definition: PoolFile.py:482
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:194
python.PoolFile.DiffFiles.strict
strict
Definition: PoolFile.py:967
add
bool add(const std::string &hname, TKey *tobj)
Definition: fastadd.cxx:55
python.PoolFile.PoolFile._fileInfos
_fileInfos
Definition: PoolFile.py:499
python.PoolFile._save_csv_report
def _save_csv_report(self, fileName)
Definition: PoolFile.py:924
python.PoolFile.Counter.itemList
itemList
Definition: PoolFile.py:1087
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:26
python.PoolFile.PoolRecord.Sorter.allowedValues
def allowedValues()
Definition: PoolFile.py:462
python.PoolFile.PoolFile.detailedDump
def detailedDump(self, bufferName=None)
Definition: PoolFile.py:832
python.PoolFile.make_pool_record
def make_pool_record(branch, dirType)
Definition: PoolFile.py:440
python.PoolFile._setup_ssl
def _setup_ssl(root)
Definition: PoolFile.py:380
python.PoolFile.PoolFile.__init__
def __init__(self, fileName, verbose=True)
Definition: PoolFile.py:496
python.PoolFile.PoolOpts.isDataHeader
def isDataHeader(cls, name)
Definition: PoolFile.py:282
python.PoolFile.PoolFileCatalog
Definition: PoolFile.py:42
python.PoolFile.PoolFile.verbose
verbose
Definition: PoolFile.py:507
Trk::open
@ open
Definition: BinningType.h:40
python.PoolFile.DiffFiles.printSummary
def printSummary(self, out=sys.stdout)
Definition: PoolFile.py:1073
ActsTrk::detail::MakeDerivedVariant::extend
constexpr std::variant< Args..., T > extend(const std::variant< Args... > &, const T &)
Definition: MakeDerivedVariant.h:17
python.PoolFile.DiffFiles.allGood
allGood
Definition: PoolFile.py:988
python.PoolFile.PoolRecord.name
name
Definition: PoolFile.py:475
python.PoolFile.PoolRecord.typeName
typeName
Definition: PoolFile.py:483
python.PoolFile.PoolRecord.details
details
Definition: PoolFile.py:481
python.PoolFile.isRNTuple
def isRNTuple(obj)
Definition: PoolFile.py:35
python.PoolFile.DiffFiles
Definition: PoolFile.py:957
pickleTool.object
object
Definition: pickleTool.py:29
str
Definition: BTagTrackIpAccessor.cxx:11
python.PoolFile.PoolFileCatalog.catalog
catalog
Definition: PoolFile.py:59
python.PoolFile._root_open
def _root_open(fname)
Definition: PoolFile.py:394
python.PoolFile.PoolFile
Definition: PoolFile.py:487
python.PoolFile.PoolRecord.memSizeNoZip
memSizeNoZip
Definition: PoolFile.py:478
python.PoolFile._get_total_size
def _get_total_size(branch)
Definition: PoolFile.py:308
python.PoolFile.DiffFiles.__init__
def __init__(self, refFileName, chkFileName, verbose=False, ignoreList=None, strict=False)
Definition: PoolFile.py:963
python.PoolFile.PoolOpts.isEventData
def isEventData(cls, name)
Definition: PoolFile.py:288
python.PoolFile.Units
— data ---------------------------------------------------------------—
Definition: PoolFile.py:30
python.PoolFile.DiffFiles.ignList
ignList
Definition: PoolFile.py:977
Trk::split
@ split
Definition: LayerMaterialProperties.h:38
python.PoolFile.PoolFileCatalog.DefaultCatalog
DefaultCatalog
Definition: PoolFile.py:47
python.PoolFile.PoolOpts.isAugmentation
def isAugmentation(cls, name)
Definition: PoolFile.py:292
python.PoolFile.Counter
Definition: PoolFile.py:1079
python.LArMinBiasAlgConfig.float
float
Definition: LArMinBiasAlgConfig.py:65