3 __author__ =
"Will Buttinger"
5 This script helps you check your config files are complete.
6 You can also use it to output a single PRW config file containing just your datasets.
8 Example: checkPRW.py --outPRWFile=my.prw.root --inDsTxt=my.datasets.txt path/to/prwConfigs/*.root
17 from argparse
import RawTextHelpFormatter
19 parser = argparse.ArgumentParser(description=__doc__,formatter_class=RawTextHelpFormatter)
20 parser.add_argument(
'--outPRWFile',action=
"store",help=
"OPTIONAL Name of the output prw file containing valid configs",required=
False)
21 parser.add_argument(
'--outputSuspect',action=
"store_true",help=
"allow for suspect channels to be included in the output prw file",default=
False)
22 parser.add_argument(
'--inDsTxt',action=
"store",help=
"text file containing datasets to make PRW for (one per line)",required=
True)
23 parser.add_argument(
'prwFiles',nargs=
"+",help=
"PRW Config files to scan")
25 args = parser.parse_args()
29 import pyAMI.atlas.api
as atlasAPI
32 print(
"Could not import pyAMI ... please do: lsetup pyAMI")
33 print(
"Also ensure you have a valid certificate (voms-proxy-init -voms atlas)")
36 client = pyAMI.client.Client([
'atlas',
'atlas-replica'])
41 for txtFile
in args.inDsTxt.split(
","):
42 with open(txtFile)
as f: datasets += f.read().splitlines()
45 print(
"Determining provenances of %d datasets ..." % len(datasets))
48 for dataset
in datasets:
50 if dataset.startswith(
"#"):
continue
51 dataset = dataset.rsplit(
":")[-1].strip()
52 if len(dataset)==0:
continue
54 print(
"Doing %s" % dataset)
56 if ".DAOD_PHYS." in dataset:
57 print(
"INFO: Assuming %s is unskimmed because it is DAOD_PHYS" % dataset)
59 theParentSize =
int(atlasAPI.list_datasets(client, theParent,fields=
'ldn,events')[0][
u'events'])
61 prov = atlasAPI.get_dataset_prov(client, dataset )
62 if 'node' not in prov:
63 print(
"ERROR: Could not determine provenance of %s, skipping!" % dataset)
66 for ds
in prov[
'node']:
67 if ds[
u'dataType']!=
u'AOD':
continue
68 dsName = ds[
u'logicalDatasetName']
69 if 'recon.AOD' not in ds[
u'logicalDatasetName']:
continue
70 etags = re.findall(
'e[0-9]+_', dsName)
71 stags = re.findall(
's[0-9]+_', dsName)
72 if len(etags) == 2
or len(stags) == 2:
74 print(
"INFO: Found a double e-tag container %s!" % dsName)
75 dsName = dsName.replace(etags[1],
"")
77 print(
"INFO: Found a double s-tag container %s!" % dsName)
78 dsName = dsName.replace(stags[1],
"")
79 singleTagName = dsName
81 theParent =
str(dsName)
82 theParentSize =
int(ds[
u'events'])
86 if singleTagName ==
"":
87 print(
"ERROR: No single-tag name available for %s, skipping!" % dataset)
90 print(
"INFO: Trying with single-tag containers manually %s!" % singleTagName)
92 prov = atlasAPI.get_dataset_prov(client, singleTagName)
93 except pyAMI.exception.Error:
94 print(
"ERROR: Could not determine provenance of %s, skipping!" % dataset)
97 for ds
in prov[
'node']:
98 if ds[
u'logicalDatasetName'] == singleTagName:
99 theParent = singleTagName
100 theParentSize =
int(ds[
u'events'])
102 print(
"ERROR: key 'node' not found for %s, skipping!" % dataset)
106 print(
"ERROR: Could not determine provenance of %s, skipping!" % dataset)
110 theParent = theParent.split(
".")[1]
112 if theParent
in aodDatasets: aodDatasets[theParent] += theParentSize
113 else: aodDatasets[theParent] = theParentSize
119 out = ROOT.CP.TPileupReweighting(
"out")
120 for f
in args.prwFiles:
122 out.ResetCountingMode()
125 periodNumbers = out.GetPeriodNumbers()
127 for dsid,nevents
in aodDatasets.items():
131 for p
in periodNumbers:
133 hist = out.GetInputHistogram(
int(dsid),p)
134 if hist: total += hist.GetEntries()
137 print(
"channel %s is ok" % dsid)
139 print(
"channel %s is incomplete (missing %d events from config files)" % (dsid,nevents-total))
140 out.RemoveChannel(
int(dsid))
142 print(
"channel %s is suspect! (config files have additional %d events)" % (dsid,total-nevents))
143 if not args.outputSuspect:
144 out.RemoveChannel(
int(dsid))
150 out.WriteToFile(args.outPRWFile);
156 if __name__ ==
"__main__":