ATLAS Offline Software
Loading...
Searching...
No Matches
python.PoolFile.PoolFile Class Reference
Inheritance diagram for python.PoolFile.PoolFile:
Collaboration diagram for python.PoolFile.PoolFile:

Public Types

typedef HLT::TypeInformation::for_each_type_c< typenameEDMLIST::map, my_functor, my_result<>, my_arg< HLT::TypeInformation::get_cont, CONTAINER > >::type result

Public Member Functions

 __init__ (self, fileName, verbose=True)
 fileInfos (self)
 checkFile (self, sorting=PoolRecord.Sorter.DiskSize)
 detailedDump (self, bufferName=None)

Public Attributes

 keys = None
 dataHeader
 augNames = set()
dict dataHeaderA = {}
list data = []
 verbose = verbose
 poolFile = None
 ROOT = ROOT

Protected Attributes

dict _fileInfos = None

Private Member Functions

 __openPoolFile (self, fileName)
 __processFile (self)

Detailed Description

A simple class to retrieve informations about the content of a POOL file.
It should be abstracted from the underlying technology used to create this
POOL file (Db, ROOT,...).
Right now, we are using the easy and loosy solution: going straight to the
ROOT 'API'.

Definition at line 487 of file PoolFile.py.

Member Typedef Documentation

◆ result

Definition at line 90 of file EDM_MasterSearch.h.

Constructor & Destructor Documentation

◆ __init__()

python.PoolFile.PoolFile.__init__ ( self,
fileName,
verbose = True )

Definition at line 496 of file PoolFile.py.

496 def __init__(self, fileName, verbose=True):
497 object.__init__(self)
498
499 self._fileInfos = None
500 self.keys = None
501 self.dataHeader = PoolRecord("DataHeader", 0, 0, 0,
502 nEntries = 0,
503 dirType = "T")
504 self.augNames = set()
505 self.dataHeaderA = {}
506 self.data = []
507 self.verbose = verbose
508
509 # get the "final" file name (handles all kind of protocols)
510 try:
511 protocol, fileName = file_name(fileName)
512 except Exception as err:
513 print("## warning: problem opening PoolFileCatalog:\n%s"%err)
514 import traceback
515 traceback.print_exc(err)
516 pass
517
518 self.poolFile = None
519 dbFileName = whichdb( fileName )
520 if dbFileName not in ( None, '' ):
521 if self.verbose is True:
522 print("## opening file [%s]..." % str(fileName))
523 db = shelve.open( fileName, 'r' )
524 if self.verbose is True:
525 print("## opening file [OK]")
526 report = db['report']
527 self._fileInfos = report['fileInfos']
528 self.dataHeader = report['dataHeader']
529 self.data = report['data']
530 else:
531 if self.verbose is True:
532 print("## opening file [%s]..." % str(fileName))
533 self.__openPoolFile( fileName )
534 if self.verbose is True:
535 print("## opening file [OK]")
536 self.__processFile()
537
538 return
539
void print(char *figname, TCanvas *c1)
STL class.

Member Function Documentation

◆ __openPoolFile()

python.PoolFile.PoolFile.__openPoolFile ( self,
fileName )
private

Definition at line 540 of file PoolFile.py.

540 def __openPoolFile(self, fileName):
541 # hack to prevent ROOT from loading graphic libraries and hence bother
542 # our fellow Mac users
543 if self.verbose is True:
544 print("## importing ROOT...")
545 import PyUtils.RootUtils as ru
546 ROOT = ru.import_root()
547 self.ROOT = ROOT
548 if self.verbose is True:
549 print("## importing ROOT... [DONE]")
550 # prevent ROOT from being too verbose
551 rootMsg = ShutUp()
552 rootMsg.mute()
553 ROOT.gErrorIgnoreLevel = ROOT.kFatal
554
555 poolFile = None
556 try:
557 poolFile = ROOT.TFile.Open( fileName, PoolOpts.READ_MODE )
558 except Exception as e:
559 rootMsg.unMute()
560 print("## Failed to open file [%s] !!" % fileName)
561 print("## Reason:")
562 print(e)
563 print("## Bailing out...")
564 raise IOError("Could not open file [%s]" % fileName)
565
566 rootMsg.unMute()
567
568 if poolFile is None:
569 print("## Failed to open file [%s] !!" % fileName)
570 msg = "Could not open file [%s]" % fileName
571 raise IOError(msg)
572
573 self.poolFile = poolFile
574 assert self.poolFile.IsOpen() and not self.poolFile.IsZombie(), \
575 "Invalid POOL file or a Zombie one"
576 self._fileInfos = {
577 'name' : self.poolFile.GetName(),
578 'size' : self.poolFile.GetSize(),
579 }
580 return
581

◆ __processFile()

python.PoolFile.PoolFile.__processFile ( self)
private

Definition at line 582 of file PoolFile.py.

582 def __processFile(self):
583
584 for name in {PoolOpts.TTreeNames.DataHeader, PoolOpts.RNTupleNames.DataHeader}:
585 dhKey = self.poolFile.FindKey( name )
586 if dhKey:
587 obj = self.poolFile.Get( name )
588 if isinstance(obj, self.ROOT.TTree):
589 nEntries = obj.GetEntries()
590 elif isRNTuple(obj):
591 try:
592 nEntries = self.ROOT.Experimental.RNTupleReader.Open(obj).GetNEntries()
593 except AttributeError:
594 # ROOT 6.36 and later
595 nEntries = self.ROOT.RNTupleReader.Open(obj).GetNEntries()
596 else:
597 raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
598 break
599 else:
600 nEntries = 0
601
602 keys = []
603 containers = []
604 for k in self.poolFile.GetListOfKeys():
605 keyname = k.GetName()
606 obj = self.poolFile.Get( keyname )
607 if isinstance(obj, self.ROOT.TTree):
608 containerName = obj.GetName()
609 nEntries = obj.GetEntries()
610 dirType = "T"
611 elif isRNTuple(obj):
612 try:
613 reader = self.ROOT.Experimental.RNTupleReader.Open(obj)
614 except AttributeError:
615 # ROOT 6.36 and later
616 reader = self.ROOT.RNTupleReader.Open(obj)
617 containerName = reader.GetDescriptor().GetName()
618 nEntries = reader.GetNEntries()
619 dirType = "N"
620 else:
621 raise NotImplementedError(f"Keys of type {type(obj)!r} not supported")
622 if containerName not in containers:
623 keys.append(k)
624 containers.append(containerName)
625 pass
626 if keyname.startswith(PoolOpts.POOL_HEADER) and not keyname.endswith('Form'):
627 self.dataHeaderA[PoolOpts.augmentationName(keyname)] = \
628 PoolRecord("DataHeader", 0, 0, 0,
629 nEntries = nEntries,
630 dirType = dirType)
631
632 keys.sort (key = lambda x: x.GetName())
633 self.keys = keys
634 del containers
635
636 for k in keys:
637 obj = self.poolFile.Get( k.GetName() )
638 if isinstance(obj, self.ROOT.TTree):
639 name = obj.GetName()
640 elif isRNTuple(obj):
641 try:
642 inspector = self.ROOT.Experimental.RNTupleInspector.Create(obj)
643 except AttributeError:
644 inspector = self.ROOT.RNTupleInspector.Create(obj)
645 name = inspector.GetDescriptor().GetName()
646
647 if PoolOpts.isDataHeader(name):
648 contName = "DataHeader"
649 if isinstance(obj, self.ROOT.TTree):
650 memSize = obj.GetTotBytes() / Units.kb
651 diskSize = obj.GetZipBytes() / Units.kb
652 memSizeNoZip = 0.0
653 if diskSize < 0.001:
654 memSizeNoZip = memSize
655 nEntries = obj.GetEntries()
656
658 dhBranchNames = [
659 br.GetName() for br in obj.GetListOfBranches()
660 if br.GetName().count("DataHeader_p") > 0
661 ]
662 if len(dhBranchNames) == 1:
663 dhBranch = obj.GetBranch(dhBranchNames[0])
664 typeName = dhBranch.GetClassName()
665 if not typeName and (leaf := dhBranch.GetListOfLeaves().At(0)):
666 typeName = leaf.GetTypeName()
667 poolRecord = retrieveBranchInfos(
668 dhBranch,
669 PoolRecord( contName, 0., 0., 0.,
670 nEntries,
671 dirType = "T",
672 typeName = typeName ),
673 ident = " "
674 )
675 else:
676 poolRecord = PoolRecord(contName, memSize, diskSize, memSizeNoZip,
677 nEntries,
678 dirType = "T")
679
680 self.dataHeader = poolRecord
681 elif isRNTuple(obj):
682 diskSize = inspector.GetCompressedSize() / Units.kb
683 memSize = inspector.GetUncompressedSize() / Units.kb
684
685 memSizeNoZip = 0.0
686 if diskSize < 0.001:
687 memSizeNoZip = memSize
688 nEntries = inspector.GetDescriptor().GetNEntries()
689 poolRecord = PoolRecord(contName, memSize, diskSize, memSizeNoZip,
690 nEntries,
691 dirType = "N")
692 self.dataHeader = poolRecord
693 elif PoolOpts.isData(name):
694 if isinstance(obj, self.ROOT.TTree):
695 if not hasattr(obj, 'GetListOfBranches'):
696 continue
697 branches = obj.GetListOfBranches()
698 dirType = "T"
699 if name in (PoolOpts.EVENT_DATA, PoolOpts.META_DATA):
700 dirType = "B"
701 for branch in branches:
702 poolRecord = retrieveBranchInfos(
703 branch,
704 make_pool_record(branch, dirType),
705 ident = " "
706 )
707 poolRecord.augName = PoolOpts.augmentationName(name)
708 self.augNames.add(poolRecord.augName)
709 self.data += [ poolRecord ]
710 elif isRNTuple(obj):
711 descriptor = inspector.GetDescriptor()
712 dirType = "N"
713 if name in {PoolOpts.RNTupleNames.EventData, PoolOpts.RNTupleNames.MetaData}:
714 dirType = "F"
715 fieldZeroId = descriptor.GetFieldZeroId()
716 for fieldDescriptor in descriptor.GetFieldIterable(fieldZeroId):
717 fieldId = fieldDescriptor.GetId()
718 fieldTreeInspector = inspector.GetFieldTreeInspector(fieldId)
719 diskSize = fieldTreeInspector.GetCompressedSize() / Units.kb
720 memSize = fieldTreeInspector.GetUncompressedSize() / Units.kb
721 typeName = fieldDescriptor.GetTypeName()
722 fieldName = fieldDescriptor.GetFieldName()
723 poolRecord = PoolRecord(fieldName, memSize, diskSize, memSize,
724 descriptor.GetNEntries(),
725 dirType=dirType,
726 typeName=typeName)
727 poolRecord.augName = PoolOpts.augmentationName(name)
728 self.augNames.add(poolRecord.augName)
729 self.data += [ poolRecord ]
730 # loop over keys
731
732 return
733
T * Get(TFile &f, const std::string &n, const std::string &dir="", const chainmap_t *chainmap=0, std::vector< std::string > *saved=0)
get a histogram given a path, and an optional initial directory if histogram is not found,...
bool add(const std::string &hname, TKey *tobj)
Definition fastadd.cxx:55
int count(std::string s, const std::string &regx)
count how many occurances of a regx are in a string
Definition hcg.cxx:146

◆ checkFile()

python.PoolFile.PoolFile.checkFile ( self,
sorting = PoolRecord.Sorter.DiskSize )

Definition at line 742 of file PoolFile.py.

742 def checkFile(self, sorting = PoolRecord.Sorter.DiskSize):
743 if self.verbose is True:
744 print(self.fileInfos())
745 if len(self.augNames) > 1:
746 for aug in self.augNames:
747 if len(aug) > 0:
748 print( "Nbr %s Events: %i" % (aug, self.dataHeaderA[aug].nEntries) )
749
750
751 data = self.data
752 if sorting in PoolRecord.Sorter.allowedValues():
753 import operator
754 data.sort(key = operator.attrgetter(sorting) )
755
756 def _get_val(x, dflt=-999.):
757 if PoolOpts.FAST_MODE:
758 return dflt
759 return x
760
761 totMemSize = _get_val(self.dataHeader.memSize, dflt=0.)
762 totDiskSize = self.dataHeader.diskSize
763
764 def _safe_div(num,den):
765 if float(den) == 0.:
766 return 0.
767 return num/den
768
769 if self.verbose is True:
770 print("")
771 print("="*80)
772 print(PoolOpts.HDR_FORMAT % ( "Mem Size", "Disk Size","Size/Evt",
773 "MissZip/Mem","items",
774 "(X) Container Name (X=Tree|Branch)" ))
775 print("="*80)
776
777 print(PoolOpts.ROW_FORMAT % (
778 _get_val (self.dataHeader.memSize),
779 self.dataHeader.diskSize,
780 _safe_div(self.dataHeader.diskSize,float(self.dataHeader.nEntries)),
781 _get_val (_safe_div(self.dataHeader.memSizeNoZip,
782 self.dataHeader.memSize)),
783 self.dataHeader.nEntries,
784 "("+self.dataHeader.dirType+") "+self.dataHeader.name
785 ))
786 print("-"*80)
787
788 totMemSizeA = {}
789 totDiskSizeA = {}
790 for d in data:
791 totMemSize += 0. if PoolOpts.FAST_MODE else d.memSize
792 totDiskSize += d.diskSize
793 memSizeNoZip = d.memSizeNoZip/d.memSize if d.memSize != 0. else 0.
794 aug = d.augName
795 totMemSizeA[aug] = totMemSizeA.get(aug,0.) + d.memSize
796 totDiskSizeA[aug] = totDiskSizeA.get(aug,0.) + d.diskSize
797 if self.verbose is True:
798 print(PoolOpts.ROW_FORMAT % (
799 _get_val (d.memSize),
800 d.diskSize,
801 _safe_div(d.diskSize, float(self.dataHeader.nEntries)),
802 _get_val (memSizeNoZip),
803 d.nEntries,
804 "("+d.dirType+") "+d.name
805 ))
806
807 if self.verbose is True:
808 print("="*80)
809 if len(self.augNames) > 1:
810 augs = sorted(self.augNames)
811 for a in augs:
812 print(PoolOpts.ROW_FORMAT % (
813 totMemSizeA[a], totDiskSizeA[a],
814 _safe_div(totDiskSizeA[a], float(self.dataHeaderA[a].nEntries)),
815 0.0,
816 self.dataHeaderA[a].nEntries,
817 "Aug Stream: " + ('MAIN' if a=='' else a)
818 ))
819 print("-"*80)
820 print(PoolOpts.ROW_FORMAT % (
821 totMemSize, totDiskSize,
822 _safe_div(totDiskSize, float(self.dataHeader.nEntries)),
823 0.0, self.dataHeader.nEntries,
824 "TOTAL (POOL containers)"
825 ))
826 print("="*80)
827 if PoolOpts.FAST_MODE:
828 print("::: warning: FAST_MODE was enabled: some columns' content ",)
829 print("is meaningless...")
830 return
831

◆ detailedDump()

python.PoolFile.PoolFile.detailedDump ( self,
bufferName = None )

Definition at line 832 of file PoolFile.py.

832 def detailedDump(self, bufferName = None ):
833 if self.poolFile is None or \
834 self.keys is None:
835 print("Can't perform a detailedDump with a shelve file as input !")
836 return
837
838 if bufferName is None:
839 bufferName = "/dev/stdout"
840 out = open( bufferName, "w" )
841 sys.stdout.flush()
842 save_stdout_fileno = os.dup (sys.stdout.fileno())
843 os.dup2( out.fileno(), sys.stdout.fileno() )
844
845 out.write( "#" * 80 + os.linesep )
846 out.write( "## detailed dump" + os.linesep )
847 out.flush()
848
849 for key in self.keys:
850 tree = key.ReadObj()
851 name = tree.GetName()
852
853 if PoolOpts.isDataHeader(name) or \
854 PoolOpts.isData(name):
855 try:
856 print ("=== [%s] ===" % name, file=sys.stderr)
857 tree.Print()
858 except Exception as err:
859 print ("Caught:",err, file=sys.stderr)
860 print (sys.exc_info()[0], file=sys.stderr)
861 print (sys.exc_info()[1], file=sys.stderr)
862 pass
863 pass
864 pass
865 out.write( "#" * 80 + os.linesep )
866 out.flush()
867 out.write( "#" * 80 + os.linesep )

◆ fileInfos()

python.PoolFile.PoolFile.fileInfos ( self)

Definition at line 734 of file PoolFile.py.

734 def fileInfos(self):
735 return os.linesep.join( [
736 "File:" + self._fileInfos['name'],
737 "Size: %12.3f kb" % (self._fileInfos['size'] / Units.kb),
738 "Nbr Events: %i" % self.dataHeader.nEntries
739 ] )
740
741

Member Data Documentation

◆ _fileInfos

dict python.PoolFile.PoolFile._fileInfos = None
protected

Definition at line 499 of file PoolFile.py.

◆ augNames

python.PoolFile.PoolFile.augNames = set()

Definition at line 504 of file PoolFile.py.

◆ data

list python.PoolFile.PoolFile.data = []

Definition at line 506 of file PoolFile.py.

◆ dataHeader

python.PoolFile.PoolFile.dataHeader
Initial value:
= PoolRecord("DataHeader", 0, 0, 0,
nEntries = 0,
dirType = "T")

Definition at line 501 of file PoolFile.py.

◆ dataHeaderA

dict python.PoolFile.PoolFile.dataHeaderA = {}

Definition at line 505 of file PoolFile.py.

◆ keys

python.PoolFile.PoolFile.keys = None

Definition at line 500 of file PoolFile.py.

◆ poolFile

python.PoolFile.PoolFile.poolFile = None

Definition at line 518 of file PoolFile.py.

◆ ROOT

python.PoolFile.PoolFile.ROOT = ROOT

Definition at line 547 of file PoolFile.py.

◆ verbose

python.PoolFile.PoolFile.verbose = verbose

Definition at line 507 of file PoolFile.py.


The documentation for this class was generated from the following file: