ATLAS Offline Software
checkPRW.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 __author__ = "Will Buttinger"
4 __doc__ = """
5 This script helps you check your config files are complete.
6 You can also use it to output a single PRW config file containing just your datasets.
7 
8 Example: checkPRW.py --outPRWFile=my.prw.root --inDsTxt=my.datasets.txt path/to/prwConfigs/*.root
9 
10 """
11 import argparse
12 import os
13 import pyAMI
14 import re
15 
16 def main():
17  from argparse import RawTextHelpFormatter
18 
19  parser = argparse.ArgumentParser(description=__doc__,formatter_class=RawTextHelpFormatter)
20  parser.add_argument('--outPRWFile',action="store",help="OPTIONAL Name of the output prw file containing valid configs",required=False)
21  parser.add_argument('--outputSuspect',action="store_true",help="allow for suspect channels to be included in the output prw file",default=False)
22  parser.add_argument('--inDsTxt',action="store",help="text file containing datasets to make PRW for (one per line)",required=True)
23  parser.add_argument('prwFiles',nargs="+",help="PRW Config files to scan")
24 
25  args = parser.parse_args()
26 
27 
28  try:
29  import pyAMI.atlas.api as atlasAPI
30  import pyAMI.client
31  except ImportError:
32  print("Could not import pyAMI ... please do: lsetup pyAMI")
33  print("Also ensure you have a valid certificate (voms-proxy-init -voms atlas)")
34  return 1
35 
36  client = pyAMI.client.Client(['atlas', 'atlas-replica'])
37  atlasAPI.init()
38 
39  #read datasets into list
40  datasets=[]
41  for txtFile in args.inDsTxt.split(","):
42  with open(txtFile) as f: datasets += f.read().splitlines()
43 
44 
45  print("Determining provenances of %d datasets ..." % len(datasets))
46 
47  aodDatasets=dict()
48  for dataset in datasets:
49  #strip the scope if it's there
50  if dataset.startswith("#"): continue
51  dataset = dataset.rsplit(":")[-1].strip()
52  if len(dataset)==0: continue
53 
54  print("Doing %s" % dataset)
55  theParent=""
56  if ".DAOD_PHYS." in dataset:
57  print("INFO: Assuming %s is unskimmed because it is DAOD_PHYS" % dataset)
58  theParent = dataset
59  theParentSize = int(atlasAPI.list_datasets(client, theParent,fields='ldn,events')[0][u'events'])
60  else:
61  prov = atlasAPI.get_dataset_prov(client, dataset )
62  if 'node' not in prov:
63  print("ERROR: Could not determine provenance of %s, skipping!" % dataset)
64  continue
65  singleTagName=""
66  for ds in prov['node']:
67  if ds[u'dataType']!=u'AOD': continue
68  dsName = ds[u'logicalDatasetName']
69  if 'recon.AOD' not in ds[u'logicalDatasetName']: continue
70  etags = re.findall('e[0-9]+_', dsName)
71  stags = re.findall('s[0-9]+_', dsName)
72  if len(etags) == 2 or len(stags) == 2:
73  if len(etags) == 2:
74  print("INFO: Found a double e-tag container %s!" % dsName)
75  dsName = dsName.replace(etags[1], "")
76  if len(stags) == 2:
77  print("INFO: Found a double s-tag container %s!" % dsName)
78  dsName = dsName.replace(stags[1], "")
79  singleTagName = dsName
80  continue
81  theParent = str(dsName)
82  theParentSize = int(ds[u'events'])
83  break
84 
85  if theParent == "":
86  if singleTagName == "":
87  print("ERROR: No single-tag name available for %s, skipping!" % dataset)
88  continue
89  else:
90  print("INFO: Trying with single-tag containers manually %s!" % singleTagName)
91  try:
92  prov = atlasAPI.get_dataset_prov(client, singleTagName)
93  except pyAMI.exception.Error:
94  print("ERROR: Could not determine provenance of %s, skipping!" % dataset)
95  continue
96  if 'node' in prov:
97  for ds in prov['node']:
98  if ds[u'logicalDatasetName'] == singleTagName:
99  theParent = singleTagName
100  theParentSize = int(ds[u'events'])
101  else:
102  print("ERROR: key 'node' not found for %s, skipping!" % dataset)
103  continue
104 
105  if theParent=="":
106  print("ERROR: Could not determine provenance of %s, skipping!" % dataset)
107  continue
108 
109  #extract the dsid ...
110  theParent = theParent.split(".")[1]
111 
112  if theParent in aodDatasets: aodDatasets[theParent] += theParentSize
113  else: aodDatasets[theParent] = theParentSize
114 
115  #aodDatasets is now a chanNum -> eventNumber pairing ...
116 
117  import ROOT
118 
119  out = ROOT.CP.TPileupReweighting("out")
120  for f in args.prwFiles:
121  out.AddConfigFile(f)
122  out.ResetCountingMode() #trick tool into going into counting mode
123 
124  #list of known period numbers
125  periodNumbers = out.GetPeriodNumbers()
126 
127  for dsid,nevents in aodDatasets.items():
128  #get the sum of weights from the tool
129 
130  total=0;
131  for p in periodNumbers:
132  if p==-1: continue
133  hist = out.GetInputHistogram(int(dsid),p)
134  if hist: total += hist.GetEntries()
135 
136  if total==nevents:
137  print("channel %s is ok" % dsid)
138  elif total<nevents:
139  print("channel %s is incomplete (missing %d events from config files)" % (dsid,nevents-total))
140  out.RemoveChannel(int(dsid))
141  elif total>nevents:
142  print("channel %s is suspect! (config files have additional %d events)" % (dsid,total-nevents))
143  if not args.outputSuspect:
144  out.RemoveChannel(int(dsid))
145 
146 
147 
148  if args.outPRWFile:
149  out.Initialize();
150  out.WriteToFile(args.outPRWFile);
151 
152 
153  return 0
154 
155 
156 if __name__ == "__main__":
157  os._exit(main())
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
checkPRW.main
def main()
Definition: checkPRW.py:16
Trk::open
@ open
Definition: BinningType.h:40
Muon::print
std::string print(const MuPatSegment &)
Definition: MuonTrackSteering.cxx:28
str
Definition: BTagTrackIpAccessor.cxx:11