ATLAS Offline Software
Loading...
Searching...
No Matches
python.PoolFile.PoolFile Class Reference
Inheritance diagram for python.PoolFile.PoolFile:
Collaboration diagram for python.PoolFile.PoolFile:

Public Types

typedef HLT::TypeInformation::for_each_type_c< typenameEDMLIST::map, my_functor, my_result<>, my_arg< HLT::TypeInformation::get_cont, CONTAINER > >::type result

Public Member Functions

 __init__ (self, fileName, verbose=True)
 fileInfos (self)
 checkFile (self, sorting=PoolRecord.Sorter.DiskSize)
 detailedDump (self, bufferName=None)

Public Attributes

 keys = None
 dataHeader
 augNames = set()
dict dataHeaderA = {}
list data = []
 verbose = verbose
 poolFile = None
 ROOT = ROOT

Protected Attributes

dict _fileInfos = None

Private Member Functions

 __openPoolFile (self, fileName)
 __processFile (self)

Detailed Description

A simple class to retrieve informations about the content of a POOL file.
It should be abstracted from the underlying technology used to create this
POOL file (Db, ROOT,...).
Right now, we are using the easy and loosy solution: going straight to the
ROOT 'API'.

Definition at line 492 of file PoolFile.py.

Member Typedef Documentation

◆ result

Definition at line 90 of file EDM_MasterSearch.h.

Constructor & Destructor Documentation

◆ __init__()

python.PoolFile.PoolFile.__init__ ( self,
fileName,
verbose = True )

Definition at line 501 of file PoolFile.py.

501 def __init__(self, fileName, verbose=True):
502 object.__init__(self)
503
504 self._fileInfos = None
505 self.keys = None
506 self.dataHeader = PoolRecord("DataHeader", 0, 0, 0,
507 nEntries = 0,
508 dirType = "T")
509 self.augNames = set()
510 self.dataHeaderA = {}
511 self.data = []
512 self.verbose = verbose
513
514 # get the "final" file name (handles all kind of protocols)
515 try:
516 protocol, fileName = file_name(fileName)
517 except Exception as err:
518 print("## warning: problem opening PoolFileCatalog:\n%s"%err)
519 import traceback
520 traceback.print_exc(err)
521 pass
522
523 self.poolFile = None
524 dbFileName = whichdb( fileName )
525 if dbFileName not in ( None, '' ):
526 if self.verbose is True:
527 print("## opening file [%s]..." % str(fileName))
528 db = shelve.open( fileName, 'r' )
529 if self.verbose is True:
530 print("## opening file [OK]")
531 report = db['report']
532 self._fileInfos = report['fileInfos']
533 self.dataHeader = report['dataHeader']
534 self.data = report['data']
535 else:
536 if self.verbose is True:
537 print("## opening file [%s]..." % str(fileName))
538 self.__openPoolFile( fileName )
539 if self.verbose is True:
540 print("## opening file [OK]")
541 self.__processFile()
542
543 return
544
void print(char *figname, TCanvas *c1)
STL class.

Member Function Documentation

◆ __openPoolFile()

python.PoolFile.PoolFile.__openPoolFile ( self,
fileName )
private

Definition at line 545 of file PoolFile.py.

545 def __openPoolFile(self, fileName):
546 # hack to prevent ROOT from loading graphic libraries and hence bother
547 # our fellow Mac users
548 if self.verbose is True:
549 print("## importing ROOT...")
550 import PyUtils.RootUtils as ru
551 ROOT = ru.import_root()
552 self.ROOT = ROOT
553 if self.verbose is True:
554 print("## importing ROOT... [DONE]")
555 # prevent ROOT from being too verbose
556 rootMsg = ShutUp()
557 rootMsg.mute()
558 ROOT.gErrorIgnoreLevel = ROOT.kFatal
559
560 poolFile = None
561 try:
562 poolFile = ROOT.TFile.Open( fileName, PoolOpts.READ_MODE )
563 except Exception as e:
564 rootMsg.unMute()
565 print("## Failed to open file [%s] !!" % fileName)
566 print("## Reason:")
567 print(e)
568 print("## Bailing out...")
569 raise IOError("Could not open file [%s]" % fileName)
570
571 rootMsg.unMute()
572
573 if poolFile is None:
574 print("## Failed to open file [%s] !!" % fileName)
575 msg = "Could not open file [%s]" % fileName
576 raise IOError(msg)
577
578 self.poolFile = poolFile
579 assert self.poolFile.IsOpen() and not self.poolFile.IsZombie(), \
580 "Invalid POOL file or a Zombie one"
581 self._fileInfos = {
582 'name' : self.poolFile.GetName(),
583 'size' : self.poolFile.GetSize(),
584 }
585 return
586

◆ __processFile()

python.PoolFile.PoolFile.__processFile ( self)
private

Definition at line 587 of file PoolFile.py.

587 def __processFile(self):
588
589 for name in {PoolOpts.TTreeNames.DataHeader, PoolOpts.RNTupleNames.DataHeader}:
590 dhKey = self.poolFile.FindKey( name )
591 if dhKey:
592 obj = self.poolFile.Get( name )
593 if isinstance(obj, self.ROOT.TTree):
594 nEntries = obj.GetEntries()
595 elif isRNTuple(obj):
596 try:
597 nEntries = self.ROOT.Experimental.RNTupleReader.Open(obj).GetNEntries()
598 except AttributeError:
599 # ROOT 6.36 and later
600 nEntries = self.ROOT.RNTupleReader.Open(obj).GetNEntries()
601 else:
602 raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
603 break
604 else:
605 nEntries = 0
606
607 keys = []
608 containers = []
609 for k in self.poolFile.GetListOfKeys():
610 keyname = k.GetName()
611 obj = self.poolFile.Get( keyname )
612 if isinstance(obj, self.ROOT.TTree):
613 containerName = obj.GetName()
614 nEntries = obj.GetEntries()
615 dirType = "T"
616 elif isRNTuple(obj):
617 try:
618 reader = self.ROOT.Experimental.RNTupleReader.Open(obj)
619 except AttributeError:
620 # ROOT 6.36 and later
621 reader = self.ROOT.RNTupleReader.Open(obj)
622 containerName = reader.GetDescriptor().GetName()
623 nEntries = reader.GetNEntries()
624 dirType = "N"
625 else:
626 raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
627 if containerName not in containers:
628 keys.append(k)
629 containers.append(containerName)
630 pass
631 if keyname.startswith(PoolOpts.POOL_HEADER) and not keyname.endswith('Form'):
632 self.dataHeaderA[PoolOpts.augmentationName(keyname)] = \
633 PoolRecord("DataHeader", 0, 0, 0,
634 nEntries = nEntries,
635 dirType = dirType)
636
637 keys.sort (key = lambda x: x.GetName())
638 self.keys = keys
639 del containers
640
641 for k in keys:
642 obj = self.poolFile.Get( k.GetName() )
643 if isinstance(obj, self.ROOT.TTree):
644 name = obj.GetName()
645 elif isRNTuple(obj):
646 try:
647 inspector = self.ROOT.Experimental.RNTupleInspector.Create(obj)
648 except AttributeError:
649 inspector = self.ROOT.RNTupleInspector.Create(obj)
650 name = inspector.GetDescriptor().GetName()
651
652 if PoolOpts.isDataHeader(name):
653 contName = "DataHeader"
654 if isinstance(obj, self.ROOT.TTree):
655 memSize = obj.GetTotBytes() / Units.kb
656 diskSize = obj.GetZipBytes() / Units.kb
657 memSizeNoZip = 0.0
658 if diskSize < 0.001:
659 memSizeNoZip = memSize
660 nEntries = obj.GetEntries()
661
663 dhBranchNames = [
664 br.GetName() for br in obj.GetListOfBranches()
665 if br.GetName().count("DataHeader_p") > 0
666 ]
667 if len(dhBranchNames) == 1:
668 dhBranch = obj.GetBranch(dhBranchNames[0])
669 typeName = dhBranch.GetClassName()
670 if not typeName and (leaf := dhBranch.GetListOfLeaves().At(0)):
671 typeName = leaf.GetTypeName()
672 poolRecord = retrieveBranchInfos(
673 dhBranch,
674 PoolRecord( contName, 0., 0., 0.,
675 nEntries,
676 dirType = "T",
677 typeName = typeName ),
678 ident = " "
679 )
680 else:
681 poolRecord = PoolRecord(contName, memSize, diskSize, memSizeNoZip,
682 nEntries,
683 dirType = "T")
684
685 self.dataHeader = poolRecord
686 elif isRNTuple(obj):
687 diskSize = inspector.GetCompressedSize() / Units.kb
688 memSize = inspector.GetUncompressedSize() / Units.kb
689
690 memSizeNoZip = 0.0
691 if diskSize < 0.001:
692 memSizeNoZip = memSize
693 nEntries = inspector.GetDescriptor().GetNEntries()
694 poolRecord = PoolRecord(contName, memSize, diskSize, memSizeNoZip,
695 nEntries,
696 dirType = "N")
697 self.dataHeader = poolRecord
698 elif PoolOpts.isData(name):
699 if isinstance(obj, self.ROOT.TTree):
700 if not hasattr(obj, 'GetListOfBranches'):
701 continue
702 branches = obj.GetListOfBranches()
703 dirType = "T"
704 if name in (PoolOpts.EVENT_DATA, PoolOpts.META_DATA):
705 dirType = "B"
706 for branch in branches:
707 poolRecord = retrieveBranchInfos(
708 branch,
709 make_pool_record(branch, dirType),
710 ident = " "
711 )
712 poolRecord.augName = PoolOpts.augmentationName(name)
713 self.augNames.add(poolRecord.augName)
714 self.data += [ poolRecord ]
715 elif isRNTuple(obj):
716 descriptor = inspector.GetDescriptor()
717 dirType = "N"
718 if name in {PoolOpts.RNTupleNames.EventData, PoolOpts.RNTupleNames.MetaData}:
719 dirType = "F"
720 fieldZeroId = descriptor.GetFieldZeroId()
721 for fieldDescriptor in descriptor.GetFieldIterable(fieldZeroId):
722 fieldId = fieldDescriptor.GetId()
723 fieldTreeInspector = inspector.GetFieldTreeInspector(fieldId)
724 diskSize = fieldTreeInspector.GetCompressedSize() / Units.kb
725 memSize = fieldTreeInspector.GetUncompressedSize() / Units.kb
726 typeName = fieldDescriptor.GetTypeName()
727 fieldName = fieldDescriptor.GetFieldName()
728 poolRecord = PoolRecord(fieldName, memSize, diskSize, memSize,
729 descriptor.GetNEntries(),
730 dirType=dirType,
731 typeName=typeName)
732 poolRecord.augName = PoolOpts.augmentationName(name)
733 self.augNames.add(poolRecord.augName)
734 self.data += [ poolRecord ]
735 # loop over keys
736
737 return
738
T * Get(TFile &f, const std::string &n, const std::string &dir="", const chainmap_t *chainmap=0, std::vector< std::string > *saved=0)
get a histogram given a path, and an optional initial directory if histogram is not found,...
bool add(const std::string &hname, TKey *tobj)
Definition fastadd.cxx:55
int count(std::string s, const std::string &regx)
count how many occurances of a regx are in a string
Definition hcg.cxx:146

◆ checkFile()

python.PoolFile.PoolFile.checkFile ( self,
sorting = PoolRecord.Sorter.DiskSize )

Definition at line 747 of file PoolFile.py.

747 def checkFile(self, sorting = PoolRecord.Sorter.DiskSize):
748 if self.verbose is True:
749 print(self.fileInfos())
750 if len(self.augNames) > 1:
751 for aug in self.augNames:
752 if len(aug) > 0:
753 print( "Nbr %s Events: %i" % (aug, self.dataHeaderA[aug].nEntries) )
754
755
756 data = self.data
757 if sorting in PoolRecord.Sorter.allowedValues():
758 import operator
759 data.sort(key = operator.attrgetter(sorting) )
760
761 def _get_val(x, dflt=-999.):
762 if PoolOpts.FAST_MODE:
763 return dflt
764 return x
765
766 totMemSize = _get_val(self.dataHeader.memSize, dflt=0.)
767 totDiskSize = self.dataHeader.diskSize
768
769 def _safe_div(num,den):
770 if float(den) == 0.:
771 return 0.
772 return num/den
773
774 if self.verbose is True:
775 print("")
776 print("="*80)
777 print(PoolOpts.HDR_FORMAT % ( "Mem Size", "Disk Size","Size/Evt",
778 "MissZip/Mem","items",
779 "(X) Container Name (X=Tree|Branch)" ))
780 print("="*80)
781
782 print(PoolOpts.ROW_FORMAT % (
783 _get_val (self.dataHeader.memSize),
784 self.dataHeader.diskSize,
785 _safe_div(self.dataHeader.diskSize,float(self.dataHeader.nEntries)),
786 _get_val (_safe_div(self.dataHeader.memSizeNoZip,
787 self.dataHeader.memSize)),
788 self.dataHeader.nEntries,
789 "("+self.dataHeader.dirType+") "+self.dataHeader.name
790 ))
791 print("-"*80)
792
793 totMemSizeA = {}
794 totDiskSizeA = {}
795 for d in data:
796 totMemSize += 0. if PoolOpts.FAST_MODE else d.memSize
797 totDiskSize += d.diskSize
798 memSizeNoZip = d.memSizeNoZip/d.memSize if d.memSize != 0. else 0.
799 aug = d.augName
800 totMemSizeA[aug] = totMemSizeA.get(aug,0.) + d.memSize
801 totDiskSizeA[aug] = totDiskSizeA.get(aug,0.) + d.diskSize
802 if self.verbose is True:
803 print(PoolOpts.ROW_FORMAT % (
804 _get_val (d.memSize),
805 d.diskSize,
806 _safe_div(d.diskSize, float(self.dataHeader.nEntries)),
807 _get_val (memSizeNoZip),
808 d.nEntries,
809 "("+d.dirType+") "+d.name
810 ))
811
812 if self.verbose is True:
813 print("="*80)
814 if len(self.augNames) > 1:
815 augs = sorted(self.augNames)
816 for a in augs:
817 print(PoolOpts.ROW_FORMAT % (
818 totMemSizeA[a], totDiskSizeA[a],
819 _safe_div(totDiskSizeA[a], float(self.dataHeaderA[a].nEntries)),
820 0.0,
821 self.dataHeaderA[a].nEntries,
822 "Aug Stream: " + ('MAIN' if a=='' else a)
823 ))
824 print("-"*80)
825 print(PoolOpts.ROW_FORMAT % (
826 totMemSize, totDiskSize,
827 _safe_div(totDiskSize, float(self.dataHeader.nEntries)),
828 0.0, self.dataHeader.nEntries,
829 "TOTAL (POOL containers)"
830 ))
831 print("="*80)
832 if PoolOpts.FAST_MODE:
833 print("::: warning: FAST_MODE was enabled: some columns' content ",)
834 print("is meaningless...")
835 return
836

◆ detailedDump()

python.PoolFile.PoolFile.detailedDump ( self,
bufferName = None )

Definition at line 837 of file PoolFile.py.

837 def detailedDump(self, bufferName = None ):
838 if self.poolFile is None or \
839 self.keys is None:
840 print("Can't perform a detailedDump with a shelve file as input !")
841 return
842
843 if bufferName is None:
844 bufferName = "/dev/stdout"
845 out = open( bufferName, "w" )
846 sys.stdout.flush()
847 save_stdout_fileno = os.dup (sys.stdout.fileno())
848 os.dup2( out.fileno(), sys.stdout.fileno() )
849
850 out.write( "#" * 80 + os.linesep )
851 out.write( "## detailed dump" + os.linesep )
852 out.flush()
853
854 for key in self.keys:
855 tree = key.ReadObj()
856 name = tree.GetName()
857
858 if PoolOpts.isDataHeader(name) or \
859 PoolOpts.isData(name):
860 try:
861 print ("=== [%s] ===" % name, file=sys.stderr)
862 tree.Print()
863 except Exception as err:
864 print ("Caught:",err, file=sys.stderr)
865 print (sys.exc_info()[0], file=sys.stderr)
866 print (sys.exc_info()[1], file=sys.stderr)
867 pass
868 pass
869 pass
870 out.write( "#" * 80 + os.linesep )
871 out.flush()
872 out.write( "#" * 80 + os.linesep )

◆ fileInfos()

python.PoolFile.PoolFile.fileInfos ( self)

Definition at line 739 of file PoolFile.py.

739 def fileInfos(self):
740 return os.linesep.join( [
741 "File:" + self._fileInfos['name'],
742 "Size: %12.3f kb" % (self._fileInfos['size'] / Units.kb),
743 "Nbr Events: %i" % self.dataHeader.nEntries
744 ] )
745
746

Member Data Documentation

◆ _fileInfos

dict python.PoolFile.PoolFile._fileInfos = None
protected

Definition at line 504 of file PoolFile.py.

◆ augNames

python.PoolFile.PoolFile.augNames = set()

Definition at line 509 of file PoolFile.py.

◆ data

list python.PoolFile.PoolFile.data = []

Definition at line 511 of file PoolFile.py.

◆ dataHeader

python.PoolFile.PoolFile.dataHeader
Initial value:
= PoolRecord("DataHeader", 0, 0, 0,
nEntries = 0,
dirType = "T")

Definition at line 506 of file PoolFile.py.

◆ dataHeaderA

dict python.PoolFile.PoolFile.dataHeaderA = {}

Definition at line 510 of file PoolFile.py.

◆ keys

python.PoolFile.PoolFile.keys = None

Definition at line 505 of file PoolFile.py.

◆ poolFile

python.PoolFile.PoolFile.poolFile = None

Definition at line 523 of file PoolFile.py.

◆ ROOT

python.PoolFile.PoolFile.ROOT = ROOT

Definition at line 552 of file PoolFile.py.

◆ verbose

python.PoolFile.PoolFile.verbose = verbose

Definition at line 512 of file PoolFile.py.


The documentation for this class was generated from the following file: