ATLAS Offline Software
Loading...
Searching...
No Matches
python.PoolFile.PoolFile Class Reference
Inheritance diagram for python.PoolFile.PoolFile:
Collaboration diagram for python.PoolFile.PoolFile:

Public Member Functions

 __init__ (self, fileName, verbose=True)
 fileInfos (self)
 checkFile (self, sorting=PoolRecord.Sorter.DiskSize)
 detailedDump (self, bufferName=None)

Public Attributes

 keys = None
 dataHeader
 augNames = set()
dict dataHeaderA = {}
list data = []
 verbose = verbose
 poolFile = None
 ROOT = ROOT

Protected Attributes

dict _fileInfos = None

Private Member Functions

 __openPoolFile (self, fileName)
 __processFile (self)

Detailed Description

A simple class to retrieve informations about the content of a POOL file.
It should be abstracted from the underlying technology used to create this
POOL file (Db, ROOT,...).
Right now, we are using the easy and loosy solution: going straight to the
ROOT 'API'.

Definition at line 453 of file PoolFile.py.

Constructor & Destructor Documentation

◆ __init__()

python.PoolFile.PoolFile.__init__ ( self,
fileName,
verbose = True )

Definition at line 462 of file PoolFile.py.

462 def __init__(self, fileName, verbose=True):
463 object.__init__(self)
464
465 self._fileInfos = None
466 self.keys = None
467 self.dataHeader = PoolRecord("DataHeader", 0, 0, 0,
468 nEntries = 0,
469 dirType = "T")
470 self.augNames = set()
471 self.dataHeaderA = {}
472 self.data = []
473 self.verbose = verbose
474
475 # get the "final" file name (handles all kind of protocols)
476 try:
477 protocol, fileName = file_name(fileName)
478 except Exception as err:
479 print("## warning: problem opening PoolFileCatalog:\n%s"%err)
480 import traceback
481 traceback.print_exc(err)
482 pass
483
484 self.poolFile = None
485 dbFileName = whichdb( fileName )
486 if dbFileName not in ( None, '' ):
487 if self.verbose is True:
488 print("## opening file [%s]..." % str(fileName))
489 db = shelve.open( fileName, 'r' )
490 if self.verbose is True:
491 print("## opening file [OK]")
492 report = db['report']
493 self._fileInfos = report['fileInfos']
494 self.dataHeader = report['dataHeader']
495 self.data = report['data']
496 else:
497 if self.verbose is True:
498 print("## opening file [%s]..." % str(fileName))
499 self.__openPoolFile( fileName )
500 if self.verbose is True:
501 print("## opening file [OK]")
502 self.__processFile()
503
504 return
505
void print(char *figname, TCanvas *c1)
STL class.

Member Function Documentation

◆ __openPoolFile()

python.PoolFile.PoolFile.__openPoolFile ( self,
fileName )
private

Definition at line 506 of file PoolFile.py.

506 def __openPoolFile(self, fileName):
507 # hack to prevent ROOT from loading graphic libraries and hence bother
508 # our fellow Mac users
509 if self.verbose is True:
510 print("## importing ROOT...")
511 import PyUtils.RootUtils as ru
512 ROOT = ru.import_root()
513 self.ROOT = ROOT
514 if self.verbose is True:
515 print("## importing ROOT... [DONE]")
516 # prevent ROOT from being too verbose
517 rootMsg = ShutUp()
518 rootMsg.mute()
519 ROOT.gErrorIgnoreLevel = ROOT.kFatal
520
521 poolFile = None
522 try:
523 poolFile = ROOT.TFile.Open( fileName, PoolOpts.READ_MODE )
524 except Exception as e:
525 rootMsg.unMute()
526 print("## Failed to open file [%s] !!" % fileName)
527 print("## Reason:")
528 print(e)
529 print("## Bailing out...")
530 raise IOError("Could not open file [%s]" % fileName)
531
532 rootMsg.unMute()
533
534 if poolFile is None:
535 print("## Failed to open file [%s] !!" % fileName)
536 msg = "Could not open file [%s]" % fileName
537 raise IOError(msg)
538
539 self.poolFile = poolFile
540 assert self.poolFile.IsOpen() and not self.poolFile.IsZombie(), \
541 "Invalid POOL file or a Zombie one"
542 self._fileInfos = {
543 'name' : self.poolFile.GetName(),
544 'size' : self.poolFile.GetSize(),
545 }
546 return
547

◆ __processFile()

python.PoolFile.PoolFile.__processFile ( self)
private

Definition at line 548 of file PoolFile.py.

548 def __processFile(self):
549
550 for name in {PoolOpts.TTreeNames.DataHeader, PoolOpts.RNTupleNames.DataHeader}:
551 dhKey = self.poolFile.FindKey( name )
552 if dhKey:
553 obj = self.poolFile.Get( name )
554 if isinstance(obj, self.ROOT.TTree):
555 nEntries = obj.GetEntries()
556 elif isRNTuple(obj):
557 try:
558 nEntries = self.ROOT.Experimental.RNTupleReader.Open(obj).GetNEntries()
559 except AttributeError:
560 # ROOT 6.36 and later
561 nEntries = self.ROOT.RNTupleReader.Open(obj).GetNEntries()
562 else:
563 raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
564 break
565 else:
566 nEntries = 0
567
568 keys = []
569 containers = []
570 for k in self.poolFile.GetListOfKeys():
571 keyname = k.GetName()
572 obj = self.poolFile.Get( keyname )
573 if isinstance(obj, self.ROOT.TTree):
574 containerName = obj.GetName()
575 nEntries = obj.GetEntries()
576 dirType = "T"
577 elif isRNTuple(obj):
578 try:
579 reader = self.ROOT.Experimental.RNTupleReader.Open(obj)
580 except AttributeError:
581 # ROOT 6.36 and later
582 reader = self.ROOT.RNTupleReader.Open(obj)
583 containerName = reader.GetDescriptor().GetName()
584 nEntries = reader.GetNEntries()
585 dirType = "N"
586 else:
587 raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
588 if containerName not in containers:
589 keys.append(k)
590 containers.append(containerName)
591 pass
592 if keyname.startswith(PoolOpts.POOL_HEADER) and not keyname.endswith('Form'):
593 self.dataHeaderA[PoolOpts.augmentationName(keyname)] = \
594 PoolRecord("DataHeader", 0, 0, 0,
595 nEntries = nEntries,
596 dirType = dirType)
597
598 keys.sort (key = lambda x: x.GetName())
599 self.keys = keys
600 del containers
601
602 for k in keys:
603 obj = self.poolFile.Get( k.GetName() )
604 if isinstance(obj, self.ROOT.TTree):
605 name = obj.GetName()
606 elif isRNTuple(obj):
607 try:
608 inspector = self.ROOT.Experimental.RNTupleInspector.Create(obj)
609 except AttributeError:
610 inspector = self.ROOT.RNTupleInspector.Create(obj)
611 name = inspector.GetDescriptor().GetName()
612
613 if PoolOpts.isDataHeader(name):
614 contName = "DataHeader"
615 if isinstance(obj, self.ROOT.TTree):
616 memSize = obj.GetTotBytes() / Units.kb
617 diskSize = obj.GetZipBytes() / Units.kb
618 memSizeNoZip = 0.0
619 if diskSize < 0.001:
620 memSizeNoZip = memSize
621 nEntries = obj.GetEntries()
622
624 dhBranchNames = [
625 br.GetName() for br in obj.GetListOfBranches()
626 if br.GetName().count("DataHeader_p") > 0
627 ]
628 if len(dhBranchNames) == 1:
629 dhBranch = obj.GetBranch(dhBranchNames[0])
630 typeName = dhBranch.GetClassName()
631 if not typeName and (leaf := dhBranch.GetListOfLeaves().At(0)):
632 typeName = leaf.GetTypeName()
633 poolRecord = retrieveBranchInfos(
634 dhBranch,
635 PoolRecord( contName, 0., 0., 0.,
636 nEntries,
637 dirType = "T",
638 typeName = typeName ),
639 ident = " "
640 )
641 else:
642 poolRecord = PoolRecord(contName, memSize, diskSize, memSizeNoZip,
643 nEntries,
644 dirType = "T")
645
646 self.dataHeader = poolRecord
647 elif isRNTuple(obj):
648 diskSize = inspector.GetCompressedSize() / Units.kb
649 memSize = inspector.GetUncompressedSize() / Units.kb
650
651 memSizeNoZip = 0.0
652 if diskSize < 0.001:
653 memSizeNoZip = memSize
654 nEntries = inspector.GetDescriptor().GetNEntries()
655 poolRecord = PoolRecord(contName, memSize, diskSize, memSizeNoZip,
656 nEntries,
657 dirType = "N")
658 self.dataHeader = poolRecord
659 elif PoolOpts.isData(name):
660 if isinstance(obj, self.ROOT.TTree):
661 if not hasattr(obj, 'GetListOfBranches'):
662 continue
663 branches = obj.GetListOfBranches()
664 dirType = "T"
665 if name in (PoolOpts.EVENT_DATA, PoolOpts.META_DATA):
666 dirType = "B"
667 for branch in branches:
668 poolRecord = retrieveBranchInfos(
669 branch,
670 make_pool_record(branch, dirType),
671 ident = " "
672 )
673 poolRecord.augName = PoolOpts.augmentationName(name)
674 self.augNames.add(poolRecord.augName)
675 self.data += [ poolRecord ]
676 elif isRNTuple(obj):
677 descriptor = inspector.GetDescriptor()
678 dirType = "N"
679 if name in {PoolOpts.RNTupleNames.EventData, PoolOpts.RNTupleNames.MetaData}:
680 dirType = "F"
681 fieldZeroId = descriptor.GetFieldZeroId()
682 for fieldDescriptor in descriptor.GetFieldIterable(fieldZeroId):
683 fieldId = fieldDescriptor.GetId()
684 fieldTreeInspector = inspector.GetFieldTreeInspector(fieldId)
685 diskSize = fieldTreeInspector.GetCompressedSize() / Units.kb
686 memSize = fieldTreeInspector.GetUncompressedSize() / Units.kb
687 typeName = fieldDescriptor.GetTypeName()
688 fieldName = fieldDescriptor.GetFieldName()
689 poolRecord = PoolRecord(fieldName, memSize, diskSize, memSize,
690 descriptor.GetNEntries(),
691 dirType=dirType,
692 typeName=typeName)
693 poolRecord.augName = PoolOpts.augmentationName(name)
694 self.augNames.add(poolRecord.augName)
695 self.data += [ poolRecord ]
696 # loop over keys
697
698 return
699
T * Get(TFile &f, const std::string &n, const std::string &dir="", const chainmap_t *chainmap=0, std::vector< std::string > *saved=0)
get a histogram given a path, and an optional initial directory if histogram is not found,...
bool add(const std::string &hname, TKey *tobj)
Definition fastadd.cxx:55
int count(std::string s, const std::string &regx)
count how many occurances of a regx are in a string
Definition hcg.cxx:148

◆ checkFile()

python.PoolFile.PoolFile.checkFile ( self,
sorting = PoolRecord.Sorter.DiskSize )

Definition at line 708 of file PoolFile.py.

708 def checkFile(self, sorting = PoolRecord.Sorter.DiskSize):
709 if self.verbose is True:
710 print(self.fileInfos())
711 if len(self.augNames) > 1:
712 for aug in self.augNames:
713 if len(aug) > 0:
714 print( "Nbr %s Events: %i" % (aug, self.dataHeaderA[aug].nEntries) )
715
716
717 data = self.data
718 if sorting in PoolRecord.Sorter.allowedValues():
719 import operator
720 data.sort(key = operator.attrgetter(sorting) )
721
722 def _get_val(x, dflt=-999.):
723 if PoolOpts.FAST_MODE:
724 return dflt
725 return x
726
727 totMemSize = _get_val(self.dataHeader.memSize, dflt=0.)
728 totDiskSize = self.dataHeader.diskSize
729
730 def _safe_div(num,den):
731 if float(den) == 0.:
732 return 0.
733 return num/den
734
735 if self.verbose is True:
736 print("")
737 print("="*80)
738 print(PoolOpts.HDR_FORMAT % ( "Mem Size", "Disk Size","Size/Evt",
739 "MissZip/Mem","items",
740 "(X) Container Name (X=Tree|Branch)" ))
741 print("="*80)
742
743 print(PoolOpts.ROW_FORMAT % (
744 _get_val (self.dataHeader.memSize),
745 self.dataHeader.diskSize,
746 _safe_div(self.dataHeader.diskSize,float(self.dataHeader.nEntries)),
747 _get_val (_safe_div(self.dataHeader.memSizeNoZip,
748 self.dataHeader.memSize)),
749 self.dataHeader.nEntries,
750 "("+self.dataHeader.dirType+") "+self.dataHeader.name
751 ))
752 print("-"*80)
753
754 totMemSizeA = {}
755 totDiskSizeA = {}
756 for d in data:
757 totMemSize += 0. if PoolOpts.FAST_MODE else d.memSize
758 totDiskSize += d.diskSize
759 memSizeNoZip = d.memSizeNoZip/d.memSize if d.memSize != 0. else 0.
760 aug = d.augName
761 totMemSizeA[aug] = totMemSizeA.get(aug,0.) + d.memSize
762 totDiskSizeA[aug] = totDiskSizeA.get(aug,0.) + d.diskSize
763 if self.verbose is True:
764 print(PoolOpts.ROW_FORMAT % (
765 _get_val (d.memSize),
766 d.diskSize,
767 _safe_div(d.diskSize, float(self.dataHeader.nEntries)),
768 _get_val (memSizeNoZip),
769 d.nEntries,
770 "("+d.dirType+") "+d.name
771 ))
772
773 if self.verbose is True:
774 print("="*80)
775 if len(self.augNames) > 1:
776 augs = sorted(self.augNames)
777 for a in augs:
778 print(PoolOpts.ROW_FORMAT % (
779 totMemSizeA[a], totDiskSizeA[a],
780 _safe_div(totDiskSizeA[a], float(self.dataHeaderA[a].nEntries)),
781 0.0,
782 self.dataHeaderA[a].nEntries,
783 "Aug Stream: " + ('MAIN' if a=='' else a)
784 ))
785 print("-"*80)
786 print(PoolOpts.ROW_FORMAT % (
787 totMemSize, totDiskSize,
788 _safe_div(totDiskSize, float(self.dataHeader.nEntries)),
789 0.0, self.dataHeader.nEntries,
790 "TOTAL (POOL containers)"
791 ))
792 print("="*80)
793 if PoolOpts.FAST_MODE:
794 print("::: warning: FAST_MODE was enabled: some columns' content ",)
795 print("is meaningless...")
796 return
797

◆ detailedDump()

python.PoolFile.PoolFile.detailedDump ( self,
bufferName = None )

Definition at line 798 of file PoolFile.py.

798 def detailedDump(self, bufferName = None ):
799 if self.poolFile is None or \
800 self.keys is None:
801 print("Can't perform a detailedDump with a shelve file as input !")
802 return
803
804 if bufferName is None:
805 bufferName = "/dev/stdout"
806 out = open( bufferName, "w" )
807 sys.stdout.flush()
808 save_stdout_fileno = os.dup (sys.stdout.fileno())
809 os.dup2( out.fileno(), sys.stdout.fileno() )
810
811 out.write( "#" * 80 + os.linesep )
812 out.write( "## detailed dump" + os.linesep )
813 out.flush()
814
815 for key in self.keys:
816 tree = key.ReadObj()
817 name = tree.GetName()
818
819 if PoolOpts.isDataHeader(name) or \
820 PoolOpts.isData(name):
821 try:
822 print ("=== [%s] ===" % name, file=sys.stderr)
823 tree.Print()
824 except Exception as err:
825 print ("Caught:",err, file=sys.stderr)
826 print (sys.exc_info()[0], file=sys.stderr)
827 print (sys.exc_info()[1], file=sys.stderr)
828 pass
829 pass
830 pass
831 out.write( "#" * 80 + os.linesep )
832 out.flush()
833 out.write( "#" * 80 + os.linesep )

◆ fileInfos()

python.PoolFile.PoolFile.fileInfos ( self)

Definition at line 700 of file PoolFile.py.

700 def fileInfos(self):
701 return os.linesep.join( [
702 "File:" + self._fileInfos['name'],
703 "Size: %12.3f kb" % (self._fileInfos['size'] / Units.kb),
704 "Nbr Events: %i" % self.dataHeader.nEntries
705 ] )
706
707

Member Data Documentation

◆ _fileInfos

dict python.PoolFile.PoolFile._fileInfos = None
protected

Definition at line 465 of file PoolFile.py.

◆ augNames

python.PoolFile.PoolFile.augNames = set()

Definition at line 470 of file PoolFile.py.

◆ data

list python.PoolFile.PoolFile.data = []

Definition at line 472 of file PoolFile.py.

◆ dataHeader

python.PoolFile.PoolFile.dataHeader
Initial value:
= PoolRecord("DataHeader", 0, 0, 0,
nEntries = 0,
dirType = "T")

Definition at line 467 of file PoolFile.py.

◆ dataHeaderA

dict python.PoolFile.PoolFile.dataHeaderA = {}

Definition at line 471 of file PoolFile.py.

◆ keys

python.PoolFile.PoolFile.keys = None

Definition at line 466 of file PoolFile.py.

◆ poolFile

python.PoolFile.PoolFile.poolFile = None

Definition at line 484 of file PoolFile.py.

◆ ROOT

python.PoolFile.PoolFile.ROOT = ROOT

Definition at line 513 of file PoolFile.py.

◆ verbose

python.PoolFile.PoolFile.verbose = verbose

Definition at line 473 of file PoolFile.py.


The documentation for this class was generated from the following file: