ATLAS Offline Software
Loading...
Searching...
No Matches
checkPRW.py
Go to the documentation of this file.
1#!/usr/bin/env python
2# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3
4__author__ = "Will Buttinger"
5__doc__ = """
6This script helps you check your config files are complete.
7You can also use it to output a single PRW config file containing just your datasets.
8
9Example: checkPRW.py --outPRWFile=my.prw.root --inDsTxt=my.datasets.txt path/to/prwConfigs/*.root
10
11"""
12import argparse
13import os
14import re
15
16def main():
17 from argparse import RawTextHelpFormatter
18
19 parser = argparse.ArgumentParser(description=__doc__,formatter_class=RawTextHelpFormatter)
20 parser.add_argument('--outPRWFile',action="store",help="OPTIONAL Name of the output prw file containing valid configs",required=False)
21 parser.add_argument('--outputSuspect',action="store_true",help="allow for suspect channels to be included in the output prw file",default=False)
22 parser.add_argument('--inDsTxt',action="store",help="text file containing datasets to make PRW for (one per line)",required=True)
23 parser.add_argument('prwFiles',nargs="+",help="PRW Config files to scan")
24
25 args = parser.parse_args()
26
27
28 try:
29 import pyAMI.atlas.api as atlasAPI
30 import pyAMI.client
31 except ImportError:
32 print("Could not import pyAMI ... please do: lsetup pyAMI")
33 print("Also ensure you have a valid certificate (voms-proxy-init -voms atlas)")
34 return 1
35
36 client = pyAMI.client.Client(['atlas', 'atlas-replica'])
37 atlasAPI.init()
38
39 #read datasets into list
40 datasets=[]
41 for txtFile in args.inDsTxt.split(","):
42 with open(txtFile) as f: datasets += f.read().splitlines()
43
44
45 print("Determining provenances of %d datasets ..." % len(datasets))
46
47 aodDatasets=dict()
48 for dataset in datasets:
49 #strip the scope if it's there
50 if dataset.startswith("#"): continue
51 dataset = dataset.rsplit(":")[-1].strip()
52 if len(dataset)==0: continue
53
54 print("Doing %s" % dataset)
55 theParent=""
56 if ".DAOD_PHYS." in dataset:
57 print("INFO: Assuming %s is unskimmed because it is DAOD_PHYS" % dataset)
58 theParent = dataset
59 theParentSize = int(atlasAPI.list_datasets(client, theParent,fields='ldn,events')[0][u'events'])
60 else:
61 prov = atlasAPI.get_dataset_prov(client, dataset )
62 if 'node' not in prov:
63 print("ERROR: Could not determine provenance of %s, skipping!" % dataset)
64 continue
65 singleTagName=""
66 for ds in prov['node']:
67 if ds[u'dataType']!=u'AOD': continue
68 dsName = ds[u'logicalDatasetName']
69 if 'recon.AOD' not in ds[u'logicalDatasetName']: continue
70 etags = re.findall('e[0-9]+_', dsName)
71 stags = re.findall('s[0-9]+_', dsName)
72 astags = re.findall('a[0-9]+_s[0-9]+', dsName)
73 if len(etags) == 2 or len(stags) == 2 or len(astags) == 1:
74 if len(etags) == 2:
75 print("INFO: Found a double e-tag container %s!" % dsName)
76 dsName = dsName.replace(etags[1], "")
77 if len(stags) == 2:
78 print("INFO: Found a double s-tag container %s!" % dsName)
79 dsName = dsName.replace(stags[1], "")
80 if len(astags) == 1 and len(stags) == 1:
81 print("INFO: Found an a+s-tag container %s!" % dsName)
82 dsName = dsName.replace(stags[0], "")
83 singleTagName = dsName
84 continue
85 theParent = str(dsName)
86 theParentSize = int(ds[u'events'])
87 break
88
89 if theParent == "":
90 if singleTagName == "":
91 print("ERROR: No single-tag name available for %s, skipping!" % dataset)
92 continue
93 else:
94 print("INFO: Trying with single-tag containers manually %s!" % singleTagName)
95 try:
96 prov = atlasAPI.get_dataset_prov(client, singleTagName)
97 except pyAMI.exception.Error:
98 print("ERROR: Could not determine provenance of %s, skipping!" % dataset)
99 continue
100 if 'node' in prov:
101 for ds in prov['node']:
102 if ds[u'logicalDatasetName'] == singleTagName:
103 theParent = singleTagName
104 theParentSize = int(ds[u'events'])
105 else:
106 print("ERROR: key 'node' not found for %s, skipping!" % dataset)
107 continue
108
109 if theParent=="":
110 print("ERROR: Could not determine provenance of %s, skipping!" % dataset)
111 continue
112
113 #extract the dsid ...
114 theParent = theParent.split(".")[1]
115
116 if theParent in aodDatasets: aodDatasets[theParent] += theParentSize
117 else: aodDatasets[theParent] = theParentSize
118
119 #aodDatasets is now a chanNum -> eventNumber pairing ...
120
121 import ROOT
122
123 out = ROOT.CP.TPileupReweighting("out")
124 for f in args.prwFiles:
125 out.AddConfigFile(f)
126 out.ResetCountingMode() #trick tool into going into counting mode
127
128 #list of known period numbers
129 periodNumbers = out.GetPeriodNumbers()
130
131 for dsid,nevents in aodDatasets.items():
132 #get the sum of weights from the tool
133
134 total=0
135 for p in periodNumbers:
136 if p==-1: continue
137 hist = out.GetInputHistogram(int(dsid),p)
138 if hist: total += hist.GetEntries()
139
140 if total==nevents:
141 print("channel %s is ok" % dsid)
142 elif total<nevents:
143 print("channel %s is incomplete (missing %d events from config files)" % (dsid,nevents-total))
144 out.RemoveChannel(int(dsid))
145 elif total>nevents:
146 print("channel %s is suspect! (config files have additional %d events)" % (dsid,total-nevents))
147 if not args.outputSuspect:
148 out.RemoveChannel(int(dsid))
149
150
151
152 if args.outPRWFile:
153 out.Initialize()
154 out.WriteToFile(args.outPRWFile)
155
156
157 return 0
158
159
160if __name__ == "__main__":
161 os._exit(main())
void print(char *figname, TCanvas *c1)