ATLAS Offline Software
Loading...
Searching...
No Matches
PoolFile.py
Go to the documentation of this file.
1# Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2
3# @author: Sebastien Binet <binet@cern.ch>
4# @date: March 2007
5#
6#
7
8__author__ = "Sebastien Binet <binet@cern.ch>"
9
10
11__all__ = [
12 'PoolFileCatalog',
13 'PoolOpts',
14 'isRNTuple',
15 'PoolRecord',
16 'PoolFile',
17 'DiffFiles',
18 ]
19
20
21import sys
22import os
23import shelve
24
25from dbm import whichdb
26
27from .Helpers import ShutUp
28
29
30class Units (object):
31 kb = 1024.
32 Mb = 1024.*1024.
33
34
35def isRNTuple(obj):
36 # MN: remove the "try" after migration to ROOT 6.34
37 try: from ROOT import RNTuple
38 except(ImportError): from ROOT.Experimental import RNTuple
39 return isinstance( obj, RNTuple )
40
41
43 """ reverse-engineering of the POOL FileCatalog.
44 allows to retrieve the physical filename from a logical one, provided
45 that the file-id is known to the (real) PoolFileCatalog
46 """
47 DefaultCatalog = "xmlcatalog_file:PoolFileCatalog.xml"
48 AllowedProtocols = (
49 # see: PoolSvc::createCatalog
50 # http://alxr.usatlas.bnl.gov/lxr/source/atlas/Database/AthenaPOOL/PoolSvc/src/PoolSvc.cxx?v=head#736
51 "xmlcatalog_file:", # POOL default
52 "apcfile:", # ATLAS_POOLCOND_PATH
53 "prfile:", # file via PathResolver
54 "file:", # simple file on local FS
55 )
56
57 def __init__ (self, catalog=None):
58 super (PoolFileCatalog, self).__init__()
59 self.catalog = None
60
61 if catalog is None:
62 # chase poolfilecatalog location
63 catalog = os.environ.get("POOL_CATALOG", self.DefaultCatalog)
64
65 if isinstance(catalog, str):
66 catalog = [catalog]
67
68 if not isinstance (catalog, (str, list)):
69 raise TypeError(
70 "catalog contact string should be a string or a list thereof! (got %r)"%
71 type(catalog))
72
73 osp = os.path
74 def osp_exp(x):
75 return osp.expanduser(osp.expandvars(x))
76
77 def _handle_apcfile_old(x):
78 """ return $ATLAS_POOLCOND_PATH/poolcond/x
79 """
80 if 'ATLAS_POOLCOND_PATH' not in os.environ:
81 return osp_exp(x)
82 pcp = os.environ["ATLAS_POOLCOND_PATH"]
83 if x.startswith("apcfile:"):
84 x = x[len("apcfile:"):]
85 return osp_exp(osp.join(pcp, 'poolcond', x))
86
87 def _handle_apcfile(x):
88 """ return $ATLAS_POOLCOND_PATH/x
89 """
90 if 'ATLAS_POOLCOND_PATH' not in os.environ:
91 return osp_exp(x)
92 pcp = os.environ["ATLAS_POOLCOND_PATH"]
93 if x.startswith("apcfile:"):
94 x = x[len("apcfile:"):]
95 return osp_exp(osp.join(pcp, x))
96
97 def _handle_xmlcatalog_file(x):
98 return osp_exp(x[len("xmlcatalog_file:"):])
99
100 def _handle_prfile(x):
101 x = x[len("prfile:"):]
102 x = osp_exp(x)
103 try:
104 import AthenaCommon.Utils.unixtools as u
105 return u.FindFile(x,
106 os.environ['DATAPATH'].split(os.pathsep),
107 os.R_OK)
108 except ImportError:
109 return x
110
111 def _handle_file(x):
112 x = x[len("file:"):]
113 x = osp_exp(x)
114 return x
115
116 cat_dispatch = {
117 "xmlcatalog_file:": _handle_xmlcatalog_file,
118 "apcfile:": _handle_apcfile,
119 "prfile:": _handle_prfile,
120 "file:": _handle_file,
121 }
122 assert sorted(cat_dispatch.keys()) == sorted(self.AllowedProtocols), \
123 "catalog dispatch keys does not match AllowedProtocols:" \
124 "\n%s\n%s" % (sorted(cat_dispatch.keys()),
125 sorted(self.AllowedProtocols))
126
127 from . import xmldict
128 def _build_catalog(catalog):
129 if not catalog.startswith(self.AllowedProtocols):
130 raise ValueError(
131 "sorry PoolFile:PoolFileCatalog only supports %s"
132 " as a protocol for the POOL file catalog (got: '%s')"
133 % (self.AllowedProtocols, catalog)
134 )
135 for protocol, handler in cat_dispatch.iteritems():
136 if catalog.startswith(protocol):
137 catalog = handler(catalog)
138 break
139 # make sure the catalog exists...
140 import os
141
142 if not os.path.exists (catalog):
143 return {}
144 # raise RuntimeError(
145 # 'could not find any PoolFileCatalog in [%s]' % catalog
146 # )
147
148
149 root = xmldict.ElementTree.parse (catalog).getroot()
150 return dict(xmldict.xml2dict(root))
151
152 errors = []
153 cat = {'POOLFILECATALOG':{'File':[]}}
154 for c in catalog:
155 try:
156 bc = _build_catalog(c)
157 pc = bc.get('POOLFILECATALOG',{})
158 files = []
159 if pc:
160 files = pc.get('File',[])
161 if isinstance(files, dict):
162 files = [files]
163 cat['POOLFILECATALOG']['File'].extend(files)
164 except Exception as err:
165 errors.append(err)
166
167 if errors:
168 raise errors[0] # FIXME : should we customize this a bit ?
169
170 self.catalog = cat
171 pass
172
173 def pfn (self, url_or_fid):
174 """find the physical file name given a url or a file-id"""
175 import os.path as osp
176 url_or_fid = osp.expanduser(osp.expandvars(url_or_fid))
177 import types
178 if isinstance (url_or_fid, types.ListType):
179 return [self._pfn(f) for f in url_or_fid]
180 else:
181 return self._pfn(url_or_fid)
182
183 def _pfn (self, url_or_fid):
184 """find the physical file name given a url or a file-id"""
185 if not ('POOLFILECATALOG' in self.catalog):
186 return None
187 if not ('File' in self.catalog['POOLFILECATALOG']):
188 return None
189
190 PFN_IDX = 0 # take this pfn when alternates exist
191
192 files = self.catalog['POOLFILECATALOG']['File']
193 if isinstance(files, dict):
194 # in case there where only one entry in the catalog
195 files = [files]
196 import re
197 if url_or_fid.lower().startswith('fid:'):
198 url_or_fid = url_or_fid[len('fid:'):]
199 if re.compile (r'\w{8}-\w{4}-\w{4}-\w{4}-\w{12}$').match (url_or_fid):
200 fid = url_or_fid.lower()
201 # better to check consistency of catalog over all entries
202 # than declare success on first match...
203 match = {}
204 for f in files:
205 if f.ID.lower() == fid:
206 match[fid] = []
207 pfn = f.physical.pfn
208 if isinstance(pfn, (list,tuple)):
209 match[fid].append([i.name for i in pfn])
210 else:
211 match[fid].append([pfn.name])
212 if len(match[fid])==1:
213 return match[fid][0][PFN_IDX]
214 if len(match[fid])>1:
215 raise LookupError (
216 "more than one match for FID='%s'!\n%r"%(fid,match)
217 )
218 raise KeyError ("no entry with FID='%s' in catalog" % fid)
219 else:
220 url = url_or_fid
221 if url.lower().startswith("lfn:"):
222 url = url[len("lfn:"):]
223 # better to check consistency of catalog over all entries
224 # than declare success on first match...
225 match = {}
226 for f in files:
227 if (f.logical != '' # no LFN for this entry
228 and f.logical.lfn.name == url):
229 match[url] = []
230 pfn = f.physical.pfn
231 if isinstance(pfn, (list,tuple)):
232 match[url].append([i.name for i in pfn])
233 else:
234 match[url].append([pfn.name])
235 if len(match[url])==1:
236 return match[url][0][PFN_IDX]
237 if len(match[url])>1:
238 raise LookupError (
239 "more than one match for LFN='%s'!\n%r"%(url,match)
240 )
241 raise KeyError ("no entry with LFN='%s' in catalog" % url)
242 # assume that if not LFN: then PFN:, no matter what...
243 if url.lower().startswith("pfn:"):
244 url = url[len("pfn:"):]
245 return url
246
247 def __call__ (self, url_or_fid):
248 return self.pfn (url_or_fid)
249
250 pass
251
253 # default names of APR file storage elements
254 # copied here from RootUtils/APRDefaults.h for performance (as the first dictionary access takes 7 sec)
255 # see ATEAM-973 for a more detailed discussion
256 # the definitions here should be kept in sync with those!
258 EventData = "CollectionTree"
259 EventTag = "POOLCollectionTree"
260 DataHeader = "POOLContainer"
261 MetaData = "MetaData"
263 EventData = "EventData"
264 EventTag = "EventTag"
265 DataHeader = "DataHeader"
266 MetaData = "MetaData"
267
268 FAST_MODE = False
269 SUPER_DETAILED_BRANCH_SZ = False
270 READ_MODE = "READ"
271 POOL_HEADER = TTreeNames.DataHeader
272 EVENT_DATA = TTreeNames.EventData
273 META_DATA = TTreeNames.MetaData
274 HDR_FORMAT = " %11s %11s %11s %11s %5s %s"
275 ROW_FORMAT = "%12.3f kb %12.3f kb %12.3f kb %12.3f %8i %s"
276
277 @classmethod
278 def isData(cls, name):
279 return not name.startswith("##") and not cls.isDataHeader(name)
280
281 @classmethod
282 def isDataHeader(cls, name):
283 return name in {cls.TTreeNames.DataHeader
284 , cls.TTreeNames.DataHeader+"_DataHeader"
285 , cls.RNTupleNames.DataHeader}
286
287 @classmethod
288 def isEventData(cls, name):
289 return name.startswith(PoolOpts.EVENT_DATA)
290
291 @classmethod
292 def isAugmentation(cls, name):
293 return "_DAOD_" in name
294
295 @classmethod
296 def augmentationName(cls, name):
297 s = (name+"__").split('_')[2]
298 if s.endswith("Form"):
299 s = s[:-4]
300 return s
301
302 @classmethod
303 def isAugmentedHeader(cls, name):
304 return name.startswith(PoolOpts.POOL_HEADER) and cls.isAugmentation(name)
305
306 pass # class PoolOpts
307
308def _get_total_size (branch):
309 if PoolOpts.FAST_MODE:
310 return -1.
311 if not PoolOpts.SUPER_DETAILED_BRANCH_SZ:
312 return branch.GetTotalSize()
313 brSize = 0
314 branch.LoadBaskets()
315 for bnum in range(0, branch.GetWriteBasket()):
316 basket = branch.GetBasket(bnum)
317 brSize += basket.GetObjlen() - 8
318 return brSize
319
320def file_name(fname):
321 """take a file name, return the pair (protocol, 'real' file name)
322 """
323 fname = os.path.expanduser(os.path.expandvars(fname))
324
325 def _normalize_uri(uri):
326 if uri.startswith('/'):
327 return 'file:'+uri
328 return uri
329
330 from urllib.parse import urlsplit
331 url = urlsplit(_normalize_uri(fname))
332 protocol = url.scheme
333 def _normalize(fname):
334 from posixpath import normpath
335 fname = normpath(fname)
336 if fname.startswith('//'): fname = fname[1:]
337 return fname
338
339 if protocol in ('', 'file', 'pfn'):
340 protocol = ''
341 fname = _normalize(url.path)
342
343
344 if fname.startswith('/castor/'):
345 protocol = 'rfio'
346 fname = protocol + ':' + fname
347
348 elif protocol in ('rfio', 'castor'):
349 protocol = 'rfio'
350 fname = _normalize(url.path)
351 fname = protocol+':'+fname
352
353 elif protocol in ('root','dcap', 'dcache', 'http', 'https', 'dav', 'davs'):
354 pass
355
356 elif protocol in ('gsidcap',):
357 protocol = 'gfal:gsidcap'
358 pass
359
360 elif protocol in ('lfn','fid',):
361 # percolate through the PoolFileCatalog
362 from PyUtils.PoolFile import PoolFileCatalog as pfc
363 fname = pfc().pfn(protocol+':'+url.path)
364 pass
365
366 elif protocol in ('ami',):
367 # !! keep order of tokens !
368 for token in ('ami:', '//', '/'):
369 if fname.startswith(token):
370 fname = fname[len(token):]
371 fname = 'ami://' + fname
372 pass
373
374 else:
375 print(f'## warning: unknown protocol [{protocol}]. we will just return our input')
376 pass
377
378 return (protocol, fname)
379
380def _setup_ssl(root):
381 x509_proxy = os.environ.get('X509_USER_PROXY', '')
382 if x509_proxy:
383 # setup proper credentials
384 root.TSSLSocket.SetUpSSL(
385 x509_proxy,
386 "/etc/grid-security/certificates",
387 x509_proxy,
388 x509_proxy)
389 else:
390 print("## warning: protocol https is requested but no X509_USER_PROXY was found! (opening the file might fail.)")
391 pass
392 return
393
394def _root_open(fname):
395 import PyUtils.RootUtils as ru
396 root = ru.import_root()
397 import re
398
399 with ShutUp(filters=[
400 re.compile('TClass::TClass:0: RuntimeWarning: no dictionary for class.*') ]):
401 root.gSystem.Load('libRootCollection')
402 root_open = root.TFile.Open
403
404 # we need to get back the protocol b/c of the special
405 # case of secure-http which needs to open TFiles as TWebFiles...
406 protocol, _ = file_name(fname)
407 if protocol == 'https':
408 _setup_ssl(root)
409 root_open = root.TWebFile.Open
410
411 f = root_open(fname, 'READ')
412 if f is None or not f:
413 import errno
414 raise IOError(errno.ENOENT,
415 'No such file or directory',fname)
416 return f
417 return
418
419def retrieveBranchInfos( branch, poolRecord, ident = "" ):
420 fmt = "%s %3i %8.3f %8.3f %8.3f %s"
421 if 0:
422 out = fmt % ( ident,
423 branch.GetListOfBranches().GetSize(),
424 _get_total_size (branch),
425 branch.GetTotBytes(),
426 branch.GetZipBytes(),
427 branch.GetName() )
428 print(out)
429
430 branches = branch.GetListOfBranches()
431 for b in branches:
432 poolRecord.memSize += _get_total_size (b) / Units.kb
433 if (b.GetZipBytes() < 0.001):
434 poolRecord.memSizeNoZip += _get_total_size (b) / Units.kb
435 poolRecord.diskSize += b.GetZipBytes() / Units.kb
436 poolRecord = retrieveBranchInfos ( b, poolRecord, ident+" " )
437
438 return poolRecord
439
440def make_pool_record (branch, dirType):
441 memSize = _get_total_size (branch) / Units.kb
442 zipBytes = branch.GetZipBytes()
443 memSizeNoZip = memSize if zipBytes < 0.001 else 0.
444 diskSize = branch.GetZipBytes() / Units.kb
445 typeName = branch.GetClassName()
446 if not typeName and (leaf := branch.GetListOfLeaves().At(0)):
447 typeName = leaf.GetTypeName()
448 return PoolRecord(branch.GetName(), memSize, diskSize, memSizeNoZip,
449 branch.GetEntries(),
450 dirType=dirType,
451 typeName=typeName)
452
454 """
455 """
456 class Sorter:
457 DiskSize = "diskSize"
458 MemSize = "memSize"
459 ContainerName = "name"
460
461 @staticmethod
463 return [ PoolRecord.Sorter.DiskSize,
464 PoolRecord.Sorter.MemSize,
465 PoolRecord.Sorter.ContainerName ]
466 pass
467 def __init__(self, name, memSize, diskSize, memSizeNoZip, nEntries, dirType,
468 detailedInfos = "", typeName = None):
469 """Initialize PoolRecord instance.
470
471 dirType first letter of object type name that may distinguish the types:
472 "T" for TTree, "B" for TBranch,
473 "N" for RNTuple, "F" for RField
474 """
475 object.__init__(self)
476 self.name = name
477 self.memSize = memSize
478 self.diskSize = diskSize
479 self.memSizeNoZip = memSizeNoZip
480 self.nEntries = nEntries
481 self.dirType = dirType
482 self.details = detailedInfos
483 self.augName = ''
484 self.typeName = typeName
485 return
486
488 """
489 A simple class to retrieve informations about the content of a POOL file.
490 It should be abstracted from the underlying technology used to create this
491 POOL file (Db, ROOT,...).
492 Right now, we are using the easy and loosy solution: going straight to the
493 ROOT 'API'.
494 """
495
496 def __init__(self, fileName, verbose=True):
497 object.__init__(self)
498
499 self._fileInfos = None
500 self.keys = None
501 self.dataHeader = PoolRecord("DataHeader", 0, 0, 0,
502 nEntries = 0,
503 dirType = "T")
504 self.augNames = set()
505 self.dataHeaderA = {}
506 self.data = []
507 self.verbose = verbose
508
509 # get the "final" file name (handles all kind of protocols)
510 try:
511 protocol, fileName = file_name(fileName)
512 except Exception as err:
513 print("## warning: problem opening PoolFileCatalog:\n%s"%err)
514 import traceback
515 traceback.print_exc(err)
516 pass
517
518 self.poolFile = None
519 dbFileName = whichdb( fileName )
520 if dbFileName not in ( None, '' ):
521 if self.verbose is True:
522 print("## opening file [%s]..." % str(fileName))
523 db = shelve.open( fileName, 'r' )
524 if self.verbose is True:
525 print("## opening file [OK]")
526 report = db['report']
527 self._fileInfos = report['fileInfos']
528 self.dataHeader = report['dataHeader']
529 self.data = report['data']
530 else:
531 if self.verbose is True:
532 print("## opening file [%s]..." % str(fileName))
533 self.__openPoolFile( fileName )
534 if self.verbose is True:
535 print("## opening file [OK]")
536 self.__processFile()
537
538 return
539
540 def __openPoolFile(self, fileName):
541 # hack to prevent ROOT from loading graphic libraries and hence bother
542 # our fellow Mac users
543 if self.verbose is True:
544 print("## importing ROOT...")
545 import PyUtils.RootUtils as ru
546 ROOT = ru.import_root()
547 self.ROOT = ROOT
548 if self.verbose is True:
549 print("## importing ROOT... [DONE]")
550 # prevent ROOT from being too verbose
551 rootMsg = ShutUp()
552 rootMsg.mute()
553 ROOT.gErrorIgnoreLevel = ROOT.kFatal
554
555 poolFile = None
556 try:
557 poolFile = ROOT.TFile.Open( fileName, PoolOpts.READ_MODE )
558 except Exception as e:
559 rootMsg.unMute()
560 print("## Failed to open file [%s] !!" % fileName)
561 print("## Reason:")
562 print(e)
563 print("## Bailing out...")
564 raise IOError("Could not open file [%s]" % fileName)
565
566 rootMsg.unMute()
567
568 if poolFile is None:
569 print("## Failed to open file [%s] !!" % fileName)
570 msg = "Could not open file [%s]" % fileName
571 raise IOError(msg)
572
573 self.poolFile = poolFile
574 assert self.poolFile.IsOpen() and not self.poolFile.IsZombie(), \
575 "Invalid POOL file or a Zombie one"
576 self._fileInfos = {
577 'name' : self.poolFile.GetName(),
578 'size' : self.poolFile.GetSize(),
579 }
580 return
581
582 def __processFile(self):
583
584 for name in {PoolOpts.TTreeNames.DataHeader, PoolOpts.RNTupleNames.DataHeader}:
585 dhKey = self.poolFile.FindKey( name )
586 if dhKey:
587 obj = self.poolFile.Get( name )
588 if isinstance(obj, self.ROOT.TTree):
589 nEntries = obj.GetEntries()
590 elif isRNTuple(obj):
591 try:
592 nEntries = self.ROOT.Experimental.RNTupleReader.Open(obj).GetNEntries()
593 except AttributeError:
594 # ROOT 6.36 and later
595 nEntries = self.ROOT.RNTupleReader.Open(obj).GetNEntries()
596 else:
597 raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
598 break
599 else:
600 nEntries = 0
601
602 keys = []
603 containers = []
604 for k in self.poolFile.GetListOfKeys():
605 keyname = k.GetName()
606 obj = self.poolFile.Get( keyname )
607 if isinstance(obj, self.ROOT.TTree):
608 containerName = obj.GetName()
609 nEntries = obj.GetEntries()
610 dirType = "T"
611 elif isRNTuple(obj):
612 try:
613 reader = self.ROOT.Experimental.RNTupleReader.Open(obj)
614 except AttributeError:
615 # ROOT 6.36 and later
616 reader = self.ROOT.RNTupleReader.Open(obj)
617 containerName = reader.GetDescriptor().GetName()
618 nEntries = reader.GetNEntries()
619 dirType = "N"
620 else:
621 raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
622 if containerName not in containers:
623 keys.append(k)
624 containers.append(containerName)
625 pass
626 if keyname.startswith(PoolOpts.POOL_HEADER) and not keyname.endswith('Form'):
627 self.dataHeaderA[PoolOpts.augmentationName(keyname)] = \
628 PoolRecord("DataHeader", 0, 0, 0,
629 nEntries = nEntries,
630 dirType = dirType)
631
632 keys.sort (key = lambda x: x.GetName())
633 self.keys = keys
634 del containers
635
636 for k in keys:
637 obj = self.poolFile.Get( k.GetName() )
638 if isinstance(obj, self.ROOT.TTree):
639 name = obj.GetName()
640 elif isRNTuple(obj):
641 try:
642 inspector = self.ROOT.Experimental.RNTupleInspector.Create(obj)
643 except AttributeError:
644 inspector = self.ROOT.RNTupleInspector.Create(obj)
645 name = inspector.GetDescriptor().GetName()
646
647 if PoolOpts.isDataHeader(name):
648 contName = "DataHeader"
649 if isinstance(obj, self.ROOT.TTree):
650 memSize = obj.GetTotBytes() / Units.kb
651 diskSize = obj.GetZipBytes() / Units.kb
652 memSizeNoZip = 0.0
653 if diskSize < 0.001:
654 memSizeNoZip = memSize
655 nEntries = obj.GetEntries()
656
658 dhBranchNames = [
659 br.GetName() for br in obj.GetListOfBranches()
660 if br.GetName().count("DataHeader_p") > 0
661 ]
662 if len(dhBranchNames) == 1:
663 dhBranch = obj.GetBranch(dhBranchNames[0])
664 typeName = dhBranch.GetClassName()
665 if not typeName and (leaf := dhBranch.GetListOfLeaves().At(0)):
666 typeName = leaf.GetTypeName()
667 poolRecord = retrieveBranchInfos(
668 dhBranch,
669 PoolRecord( contName, 0., 0., 0.,
670 nEntries,
671 dirType = "T",
672 typeName = typeName ),
673 ident = " "
674 )
675 else:
676 poolRecord = PoolRecord(contName, memSize, diskSize, memSizeNoZip,
677 nEntries,
678 dirType = "T")
679
680 self.dataHeader = poolRecord
681 elif isRNTuple(obj):
682 diskSize = inspector.GetCompressedSize() / Units.kb
683 memSize = inspector.GetUncompressedSize() / Units.kb
684
685 memSizeNoZip = 0.0
686 if diskSize < 0.001:
687 memSizeNoZip = memSize
688 nEntries = inspector.GetDescriptor().GetNEntries()
689 poolRecord = PoolRecord(contName, memSize, diskSize, memSizeNoZip,
690 nEntries,
691 dirType = "N")
692 self.dataHeader = poolRecord
693 elif PoolOpts.isData(name):
694 if isinstance(obj, self.ROOT.TTree):
695 if not hasattr(obj, 'GetListOfBranches'):
696 continue
697 branches = obj.GetListOfBranches()
698 dirType = "T"
699 if name in (PoolOpts.EVENT_DATA, PoolOpts.META_DATA):
700 dirType = "B"
701 for branch in branches:
702 poolRecord = retrieveBranchInfos(
703 branch,
704 make_pool_record(branch, dirType),
705 ident = " "
706 )
707 poolRecord.augName = PoolOpts.augmentationName(name)
708 self.augNames.add(poolRecord.augName)
709 self.data += [ poolRecord ]
710 elif isRNTuple(obj):
711 descriptor = inspector.GetDescriptor()
712 dirType = "N"
713 if name in {PoolOpts.RNTupleNames.EventData, PoolOpts.RNTupleNames.MetaData}:
714 dirType = "F"
715 fieldZeroId = descriptor.GetFieldZeroId()
716 for fieldDescriptor in descriptor.GetFieldIterable(fieldZeroId):
717 fieldId = fieldDescriptor.GetId()
718 fieldTreeInspector = inspector.GetFieldTreeInspector(fieldId)
719 diskSize = fieldTreeInspector.GetCompressedSize() / Units.kb
720 memSize = fieldTreeInspector.GetUncompressedSize() / Units.kb
721 typeName = fieldDescriptor.GetTypeName()
722 fieldName = fieldDescriptor.GetFieldName()
723 poolRecord = PoolRecord(fieldName, memSize, diskSize, memSize,
724 descriptor.GetNEntries(),
725 dirType=dirType,
726 typeName=typeName)
727 poolRecord.augName = PoolOpts.augmentationName(name)
728 self.augNames.add(poolRecord.augName)
729 self.data += [ poolRecord ]
730 # loop over keys
731
732 return
733
734 def fileInfos(self):
735 return os.linesep.join( [
736 "File:" + self._fileInfos['name'],
737 "Size: %12.3f kb" % (self._fileInfos['size'] / Units.kb),
738 "Nbr Events: %i" % self.dataHeader.nEntries
739 ] )
740
741
742 def checkFile(self, sorting = PoolRecord.Sorter.DiskSize):
743 if self.verbose is True:
744 print(self.fileInfos())
745 if len(self.augNames) > 1:
746 for aug in self.augNames:
747 if len(aug) > 0:
748 print( "Nbr %s Events: %i" % (aug, self.dataHeaderA[aug].nEntries) )
749
750
751 data = self.data
752 if sorting in PoolRecord.Sorter.allowedValues():
753 import operator
754 data.sort(key = operator.attrgetter(sorting) )
755
756 def _get_val(x, dflt=-999.):
757 if PoolOpts.FAST_MODE:
758 return dflt
759 return x
760
761 totMemSize = _get_val(self.dataHeader.memSize, dflt=0.)
762 totDiskSize = self.dataHeader.diskSize
763
764 def _safe_div(num,den):
765 if float(den) == 0.:
766 return 0.
767 return num/den
768
769 if self.verbose is True:
770 print("")
771 print("="*80)
772 print(PoolOpts.HDR_FORMAT % ( "Mem Size", "Disk Size","Size/Evt",
773 "MissZip/Mem","items",
774 "(X) Container Name (X=Tree|Branch)" ))
775 print("="*80)
776
777 print(PoolOpts.ROW_FORMAT % (
778 _get_val (self.dataHeader.memSize),
779 self.dataHeader.diskSize,
780 _safe_div(self.dataHeader.diskSize,float(self.dataHeader.nEntries)),
781 _get_val (_safe_div(self.dataHeader.memSizeNoZip,
782 self.dataHeader.memSize)),
783 self.dataHeader.nEntries,
784 "("+self.dataHeader.dirType+") "+self.dataHeader.name
785 ))
786 print("-"*80)
787
788 totMemSizeA = {}
789 totDiskSizeA = {}
790 for d in data:
791 totMemSize += 0. if PoolOpts.FAST_MODE else d.memSize
792 totDiskSize += d.diskSize
793 memSizeNoZip = d.memSizeNoZip/d.memSize if d.memSize != 0. else 0.
794 aug = d.augName
795 totMemSizeA[aug] = totMemSizeA.get(aug,0.) + d.memSize
796 totDiskSizeA[aug] = totDiskSizeA.get(aug,0.) + d.diskSize
797 if self.verbose is True:
798 print(PoolOpts.ROW_FORMAT % (
799 _get_val (d.memSize),
800 d.diskSize,
801 _safe_div(d.diskSize, float(self.dataHeader.nEntries)),
802 _get_val (memSizeNoZip),
803 d.nEntries,
804 "("+d.dirType+") "+d.name
805 ))
806
807 if self.verbose is True:
808 print("="*80)
809 if len(self.augNames) > 1:
810 augs = sorted(self.augNames)
811 for a in augs:
812 print(PoolOpts.ROW_FORMAT % (
813 totMemSizeA[a], totDiskSizeA[a],
814 _safe_div(totDiskSizeA[a], float(self.dataHeaderA[a].nEntries)),
815 0.0,
816 self.dataHeaderA[a].nEntries,
817 "Aug Stream: " + ('MAIN' if a=='' else a)
818 ))
819 print("-"*80)
820 print(PoolOpts.ROW_FORMAT % (
821 totMemSize, totDiskSize,
822 _safe_div(totDiskSize, float(self.dataHeader.nEntries)),
823 0.0, self.dataHeader.nEntries,
824 "TOTAL (POOL containers)"
825 ))
826 print("="*80)
827 if PoolOpts.FAST_MODE:
828 print("::: warning: FAST_MODE was enabled: some columns' content ",)
829 print("is meaningless...")
830 return
831
832 def detailedDump(self, bufferName = None ):
833 if self.poolFile is None or \
834 self.keys is None:
835 print("Can't perform a detailedDump with a shelve file as input !")
836 return
837
838 if bufferName is None:
839 bufferName = "/dev/stdout"
840 out = open( bufferName, "w" )
841 sys.stdout.flush()
842 save_stdout_fileno = os.dup (sys.stdout.fileno())
843 os.dup2( out.fileno(), sys.stdout.fileno() )
844
845 out.write( "#" * 80 + os.linesep )
846 out.write( "## detailed dump" + os.linesep )
847 out.flush()
848
849 for key in self.keys:
850 tree = key.ReadObj()
851 name = tree.GetName()
852
853 if PoolOpts.isDataHeader(name) or \
854 PoolOpts.isData(name):
855 try:
856 print ("=== [%s] ===" % name, file=sys.stderr)
857 tree.Print()
858 except Exception as err:
859 print ("Caught:",err, file=sys.stderr)
860 print (sys.exc_info()[0], file=sys.stderr)
861 print (sys.exc_info()[1], file=sys.stderr)
862 pass
863 pass
864 pass
865 out.write( "#" * 80 + os.linesep )
866 out.flush()
867 out.write( "#" * 80 + os.linesep )
868
872 out.flush()
873 if bufferName != "<stdout>":
874 out.close()
875 sys.stdout.close()
876 sys.stdout = open (save_stdout_fileno, 'a')
877 return
878
879 def poolRecord(self, name):
880 """
881 Return a PoolRecord according to its (branch) name
882 Raise KeyError if no match is found
883 """
884 for data in self.data:
885 if data.name == name:
886 return data
887 raise KeyError("No PoolRecord with name [%s]" % name)
888
889 def saveReport (self, fileName):
890 """
891 Save all the gathered informations into a python shelve or a CSV file
892 (depending on the @param `fileName` extension)
893 """
894 import os
895 if os.path.splitext(fileName)[-1] == '.csv':
896 return self._save_csv_report (fileName)
897 return self._save_shelve_report (fileName)
898
899 def _save_shelve_report(self, fileName):
900 """
901 Save all the gathered informations into a python shelve
902 Data can then be read like so:
903 >>> import shelve
904 >>> db = shelve.open( 'myfile.dat', 'r' )
905 >>> report = db['report']
906 >>> print ('fileSize:',report['fileSize'])
907 >>> print ('dataHeader/memSize:',report['dataHeader'].memSize)
908 >>> for d in report['data']:
909 ... print ('data:',d.name,d.nEntries,d.memSize)
910 """
911 import shelve, os
912 if os.path.exists (fileName):
913 os.unlink (fileName)
914 db = shelve.open (fileName)
915 db['report'] = {
916 'fileInfos' : self._fileInfos,
917 'nbrEvts' : self.dataHeader.nEntries,
918 'dataHeader' : self.dataHeader,
919 'data' : self.data
920 }
921 db.close()
922 return
923
924 def _save_csv_report(self, fileName):
925 """
926 Save all the gathered informations into a CSV file
927 """
928 import csv, os
929 if os.path.exists (fileName):
930 os.unlink (fileName)
931 args = {'newline' : ''}
932 f = open (fileName, 'w', **args)
933 o = csv.writer (f)
934 o.writerow (['file name', self._fileInfos['name']])
935 o.writerow (['file size', self._fileInfos['size']])
936 o.writerow (['nbr evts', self.dataHeader.nEntries])
937 o.writerow (['mem size', 'disk size', 'mem size nozip', 'items',
938 'container name', 'branch type'])
939
940 for d in self.data:
941 o.writerow ([d.memSize, d.diskSize, d.memSizeNoZip,
942 d.nEntries, d.name, d.dirType])
943 f.close()
944 return
945
946 def __del__(self):
947 if self.poolFile and hasattr(self.poolFile, 'Close'):
948 try:
949 self.poolFile.Close()
950 self.poolFile = None
951 except Exception as err:
952 print("WARNING:",err)
953 pass
954
955 pass # class PoolFile
956
958 """
959 A helper class to compare 2 POOL files and check that they match, both in
960 terms of containers' content and containers' sizes
961 """
962
963 def __init__(self, refFileName, chkFileName, verbose = False, ignoreList = None, strict = False):
964 object.__init__(self)
965
966 self.verbose = verbose
967 self.strict = strict
968 refFileName = os.path.expandvars( os.path.expanduser( refFileName ) )
969 chkFileName = os.path.expandvars( os.path.expanduser( chkFileName ) )
970
971 if ignoreList is None:
972 ignoreList = []
973
974 try:
975 self.refFile = PoolFile( refFileName )
976 self.chkFile = PoolFile( chkFileName )
977 self.ignList = sorted( ignoreList )
978 except Exception as err:
979 print("## Caught exception [%s] !!" % str(err.__class__))
980 print("## What:",err)
981 print(sys.exc_info()[0])
982 print(sys.exc_info()[1])
983 err = "Error while opening POOL files !"
984 err += " chk : %s%s" % ( chkFileName, os.linesep )
985 err += " ref : %s%s" % ( refFileName, os.linesep )
986 raise Exception(err)
987
988 self.allGood = True
989 self.summary = []
990
991 self.__checkDiff()
992 return
993
994 def __checkDiff(self):
995
996 self.summary += [
997 "=" * 80,
998 "::: Comparing POOL files...",
999 " ref : %s" % self.refFile._fileInfos['name'],
1000 " chk : %s" % self.chkFile._fileInfos['name'],
1001 "-" * 80,
1002 ]
1003
1004 if self.chkFile.dataHeader.nEntries != \
1005 self.refFile.dataHeader.nEntries :
1006 self.summary += [
1007 "## WARNING: files don't have the same number of entries !!",
1008 " ref : %r" % self.refFile.dataHeader.nEntries,
1009 " chk : %r" % self.chkFile.dataHeader.nEntries,
1010 ]
1011
1012 refNames = sorted( [d.name for d in self.refFile.data] )
1013 chkNames = sorted( [d.name for d in self.chkFile.data] )
1014
1015 if chkNames != refNames:
1016 self.summary += [
1017 "## ERROR: files don't have the same content !!",
1018 ]
1019 addNames = [ n for n in chkNames if n not in refNames ]
1020 if len( addNames ) > 0:
1021 self.summary += [ "## collections in 'chk' and not in 'ref'" ]
1022 for n in addNames:
1023 self.summary += [ " + %s" % n ]
1024 subNames = [ n for n in refNames if n not in chkNames ]
1025 if len( subNames ) > 0:
1026 self.summary += [ "## collections in 'ref' and not in 'chk'" ]
1027 for n in subNames:
1028 self.summary += [ " - %s" % n ]
1029 self.allGood = False
1030 pass
1031
1032 if len(self.ignList) > 0:
1033 self.summary += [ "## Ignoring the following:" ]
1034 for n in self.ignList:
1035 self.summary += [ " %s" % n ]
1036
1037 commonContent = [ d for d in chkNames if (d in refNames and d not in self.ignList)]
1038
1039 if not self.allGood:
1040 self.summary += [ "=" * 80 ]
1041 self.summary += [ "::: comparing common content (mem-size / disk-size)..." ]
1042
1043 for name in commonContent:
1044 chkMemSize = self.chkFile.poolRecord(name).memSize
1045 refMemSize = self.refFile.poolRecord(name).memSize
1046 chkDiskSize = self.chkFile.poolRecord(name).diskSize
1047 refDiskSize = self.refFile.poolRecord(name).diskSize
1048
1049 if chkMemSize != refMemSize or (self.strict and chkDiskSize != refDiskSize):
1050 self.summary += [
1051 "[ERR] %12.3f / %12.3f kb (ref) ==> %12.3f / %12.3f kb (chk) | %s" % \
1052 ( refMemSize,refDiskSize,chkMemSize,chkDiskSize, name )
1053 ]
1054 self.allGood = False
1055 elif self.verbose:
1056 self.summary += [
1057 " [OK] %12.3f/%12.3f kb | %s" % \
1058 ( chkMemSize, chkDiskSize, name )
1059 ]
1060
1061 self.summary += [ "=" * 80 ]
1062
1063
1064 if self.allGood: self.summary += [ "## Comparison : [OK]" ]
1065 else: self.summary += [ "## Comparison : [ERR]" ]
1066
1067 return self.allGood
1068
1069 def status(self):
1070 if self.allGood: return 0
1071 else: return 1
1072
1073 def printSummary(self, out = sys.stdout):
1074 for i in self.summary:
1075 out.writelines( i + os.linesep )
1076 pass
1077 return
1078
1080 """
1081 A counter just contains an item list (pairs class-name/sg-key) and the size
1082 """
1083 size = 0
1084 def __init__(self, name, itemList):
1085 object.__init__(self)
1086 self.name = name
1087 self.itemList = itemList
1088 pass # Counter
1089
1090
void print(char *figname, TCanvas *c1)
__init__(self, name, itemList)
Definition PoolFile.py:1084
printSummary(self, out=sys.stdout)
Definition PoolFile.py:1073
bool allGood
final decision
Definition PoolFile.py:988
__init__(self, refFileName, chkFileName, verbose=False, ignoreList=None, strict=False)
Definition PoolFile.py:963
__call__(self, url_or_fid)
Definition PoolFile.py:247
__init__(self, catalog=None)
Definition PoolFile.py:57
__openPoolFile(self, fileName)
Definition PoolFile.py:540
detailedDump(self, bufferName=None)
Definition PoolFile.py:832
__init__(self, fileName, verbose=True)
Definition PoolFile.py:496
isAugmentation(cls, name)
Definition PoolFile.py:292
augmentationName(cls, name)
Definition PoolFile.py:296
isAugmentedHeader(cls, name)
Definition PoolFile.py:303
__init__(self, name, memSize, diskSize, memSizeNoZip, nEntries, dirType, detailedInfos="", typeName=None)
Definition PoolFile.py:468
— data ---------------------------------------------------------------—
Definition PoolFile.py:30
STL class.
T * Get(TFile &f, const std::string &n, const std::string &dir="", const chainmap_t *chainmap=0, std::vector< std::string > *saved=0)
get a histogram given a path, and an optional initial directory if histogram is not found,...
bool add(const std::string &hname, TKey *tobj)
Definition fastadd.cxx:55
int count(std::string s, const std::string &regx)
count how many occurances of a regx are in a string
Definition hcg.cxx:146
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
make_pool_record(branch, dirType)
Definition PoolFile.py:440
file_name(fname)
Definition PoolFile.py:320
retrieveBranchInfos(branch, poolRecord, ident="")
Definition PoolFile.py:419
_root_open(fname)
Definition PoolFile.py:394
_save_shelve_report(self, fileName)
Definition PoolFile.py:899
_get_total_size(branch)
Definition PoolFile.py:308
poolRecord(self, name)
Definition PoolFile.py:879
_save_csv_report(self, fileName)
Definition PoolFile.py:924
saveReport(self, fileName)
Definition PoolFile.py:889
void handler(int sig)
signal handler
Definition rmain.cxx:99