Functions
def	main ()

Variables
string	__author__ = "Will Buttinger"

string	__doc__

Function Documentation

◆ main()

def checkPRW.main ( )

Definition at line 16 of file checkPRW.py.

 def main():
     from argparse import RawTextHelpFormatter
     
     parser = argparse.ArgumentParser(description=__doc__,formatter_class=RawTextHelpFormatter)
     parser.add_argument('--outPRWFile',action="store",help="OPTIONAL Name of the output prw file containing valid configs",required=False)
     parser.add_argument('--outputSuspect',action="store_true",help="allow for suspect channels to be included in the output prw file",default=False)
     parser.add_argument('--inDsTxt',action="store",help="text file containing datasets to make PRW for (one per line)",required=True)
     parser.add_argument('prwFiles',nargs="+",help="PRW Config files to scan")
     
     args = parser.parse_args()
  
     
     try:
       import pyAMI.atlas.api as atlasAPI
       import pyAMI.client
     except ImportError:
       print("Could not import pyAMI ... please do: lsetup pyAMI")
       print("Also ensure you have a valid certificate (voms-proxy-init -voms atlas)")
       return 1
  
     client = pyAMI.client.Client(['atlas', 'atlas-replica'])
     atlasAPI.init()
  
     #read datasets into list
     datasets=[]
     for txtFile in args.inDsTxt.split(","):
       with open(txtFile) as f: datasets += f.read().splitlines()
  
     
     print("Determining provenances of %d datasets ..." % len(datasets))
     
     aodDatasets=dict()
     for dataset in datasets:
       #strip the scope if it's there
       if dataset.startswith("#"): continue
       dataset = dataset.rsplit(":")[-1].strip()
       if len(dataset)==0: continue
  
       print("Doing %s" % dataset)
       theParent=""
       if ".DAOD_PHYS." in dataset:
           print("INFO: Assuming %s is unskimmed because it is DAOD_PHYS" % dataset)
           theParent = dataset
           theParentSize = int(atlasAPI.list_datasets(client, theParent,fields='ldn,events')[0][u'events'])
       else:
           prov = atlasAPI.get_dataset_prov(client, dataset )
           if 'node' not in prov:
               print("ERROR: Could not determine provenance of %s, skipping!" % dataset)
               continue
           singleTagName=""
           for ds in prov['node']:
             if ds[u'dataType']!=u'AOD': continue
             dsName = ds[u'logicalDatasetName']
             if 'recon.AOD' not in ds[u'logicalDatasetName']: continue
             etags = re.findall('e[0-9]+_', dsName)
             stags = re.findall('s[0-9]+_', dsName)
             astags = re.findall('a[0-9]+_s[0-9]+', dsName)
             if len(etags) == 2 or len(stags) == 2 or len(astags) == 1:
                 if len(etags) == 2:
                     print("INFO: Found a double e-tag container %s!" % dsName)
                     dsName = dsName.replace(etags[1], "")
                 if len(stags) == 2:
                     print("INFO: Found a double s-tag container %s!" % dsName)
                     dsName = dsName.replace(stags[1], "")
                 if len(astags) == 1 and len(stags) == 1:
                     print("INFO: Found an a+s-tag container %s!" % dsName)
                     dsName = dsName.replace(stags[0], "")
                 singleTagName = dsName
                 continue
             theParent = str(dsName)
             theParentSize = int(ds[u'events'])
             break
  
           if theParent == "":
             if singleTagName == "":
                 print("ERROR: No single-tag name available for %s, skipping!" % dataset)
                 continue
             else:
                 print("INFO: Trying with single-tag containers manually %s!" % singleTagName)
                 try:
                     prov = atlasAPI.get_dataset_prov(client, singleTagName)
                 except pyAMI.exception.Error:
                     print("ERROR: Could not determine provenance of %s, skipping!" % dataset)
                     continue
                 if 'node' in prov:
                     for ds in prov['node']:
                         if ds[u'logicalDatasetName'] == singleTagName:
                             theParent = singleTagName
                             theParentSize = int(ds[u'events'])
                 else:
                     print("ERROR: key 'node' not found for %s, skipping!" % dataset)
                     continue
  
           if theParent=="":
               print("ERROR: Could not determine provenance of %s, skipping!" % dataset)
               continue
  
       #extract the dsid ...
       theParent = theParent.split(".")[1]
       
       if theParent in aodDatasets: aodDatasets[theParent] += theParentSize
       else: aodDatasets[theParent] = theParentSize
     
     #aodDatasets is now a chanNum -> eventNumber pairing ...
     
     import ROOT
     
     out = ROOT.CP.TPileupReweighting("out")
     for f in args.prwFiles:
       out.AddConfigFile(f)
     out.ResetCountingMode() #trick tool into going into counting mode 
     
     #list of known period numbers
     periodNumbers = out.GetPeriodNumbers()
     
     for dsid,nevents in aodDatasets.items():
       #get the sum of weights from the tool
       
       total=0;
       for p in periodNumbers:
         if p==-1: continue
         hist = out.GetInputHistogram(int(dsid),p)
         if hist: total += hist.GetEntries()
       
       if total==nevents:
         print("channel %s is ok" % dsid)
       elif total<nevents:
         print("channel %s is incomplete (missing %d events from config files)" % (dsid,nevents-total))
         out.RemoveChannel(int(dsid))
       elif total>nevents:
         print("channel %s is suspect! (config files have additional %d events)" % (dsid,total-nevents))
         if not args.outputSuspect:
           out.RemoveChannel(int(dsid))
       
       
     
     if args.outPRWFile:
         out.Initialize();
         out.WriteToFile(args.outPRWFile);
     
     
     return 0
  
  

Variable Documentation

◆ author

string checkPRW.__author__ = "Will Buttinger"

private

Definition at line 3 of file checkPRW.py.

◆ doc

string checkPRW.__doc__

private

Initial value:

 =  """
 This script helps you check your config files are complete. 
 You can also use it to output a single PRW config file containing just your datasets.
  
 Example: checkPRW.py --outPRWFile=my.prw.root --inDsTxt=my.datasets.txt  path/to/prwConfigs/*.root
  
 """

Definition at line 4 of file checkPRW.py.

Functions

Variables

Function Documentation

◆ main()

Variable Documentation

◆ __author__

◆ __doc__

◆ author

◆ doc