ATLAS Offline Software
Loading...
Searching...
No Matches
generatePRW.py
Go to the documentation of this file.
1#!/usr/bin/env python
2# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3
4__author__ = "Will Buttinger"
5__doc__ = """
6Submit a task to the grid to generate PRW config files for a list of datasets.
7This command ensures that the PRW are generated from the unskimmed primary AOD datasets.
8
9Example: generatePRW.py --outDS=user.will.myPRW --inDsTxt=datasets.txt
10
11"""
12
13import os
14import argparse
15
16def main():
17 from argparse import RawTextHelpFormatter
18
19 parser = argparse.ArgumentParser(description=__doc__,formatter_class=RawTextHelpFormatter)
20
21 parser.add_argument('--inDsTxt',action="store",help="text file containing datasets to make PRW for (one per line) [REQUIRED]",required=True)
22 parser.add_argument('--outDS',action="store",default="",help="Name of the output dataset",required=False)
23 parser.add_argument('--forceStaged',action="store_true",help="If set, grid jobs will be submitted with forceStaged option")
24 parser.add_argument('--skipNTUP_PILEUP',action="store_true",help="If set, will not check for existing NTUP_PILEUP datasets")
25 parser.add_argument('prwFiles',nargs="*",help="Existing PRW Config files to check")
26
27 args = parser.parse_args()
28
29 try:
30 import pyAMI.atlas.api as atlasAPI
31 import pyAMI.client
32 except ImportError:
33 print("Could not import pyAMI ... please do: lsetup pyAMI")
34 print("Also ensure you have a valid certificate (voms-proxy-init -voms atlas)")
35 return 1
36
37 client = pyAMI.client.Client(['atlas', 'atlas-replica'])
38 atlasAPI.init()
39
40 #read datasets into list
41 with open(args.inDsTxt) as f: datasets = f.read().splitlines()
42
43
44
45 import ROOT
46
47 out = ROOT.CP.TPileupReweighting("out")
48 for f in args.prwFiles:
49 out.AddConfigFile(f)
50 out.ResetCountingMode() #trick tool into going into counting mode
51 #list of known period numbers
52 periodNumbers = out.GetPeriodNumbers()
53
54 # count lines
55 count=0
56 for dataset in datasets:
57 d = dataset.strip("/")
58 if d.startswith("#"): continue
59 d = d.rsplit(":")[-1].strip()
60 if len(d)==0: continue
61 count+=1
62
63 print("Determining provenances of %d datasets ..." % count)
64
65 aodDatasets=[]
66 ntupDatasets=[]
67 for dataset in datasets:
68 dataset = dataset.strip("/")
69 #strip the scope if it's there
70 if dataset.startswith("#"): continue
71 dataset = dataset.rsplit(":")[-1].strip()
72 if len(dataset)==0: continue
73
74 print("Doing %s" % dataset)
75 theParent=""
76 if ".DAOD_PHYS." in dataset:
77 print("INFO: Assuming %s is unskimmed because it is DAOD_PHYS" % dataset)
78 theParent = dataset
79 else:
80 prov = atlasAPI.get_dataset_prov(client, dataset )
81 if 'node' not in prov:
82 print("ERROR: Could not determine provenance of %s, skipping!" % dataset)
83 continue
84 for ds in prov['node']:
85 if ds[u'dataType']!=u'AOD': continue
86 if 'recon.AOD' not in ds[u'logicalDatasetName']: continue
87 theParent = str(ds[u'logicalDatasetName'])
88 theParentSize = int(ds[u'events'])
89 break
90 if theParent=="":
91 print("ERROR: Could not determine provenance of %s, skipping!" % dataset)
92 continue
93
94 #check input prw files, if we specified
95 isIncomplete=False
96 if len(args.prwFiles):
97 total=0
98 dsid = theParent.split(".")[1]
99 for p in periodNumbers:
100 if p==-1: continue
101 hist = out.GetInputHistogram(int(dsid),p)
102 if hist: total += hist.GetEntries()
103
104 if total==theParentSize:
105 print("INFO: %s is complete in your existing PRW files. Good!" % dataset)
106 continue
107 if total>theParentSize:
108 print("WARNING: %s is suspect in your existing PRW files, has %d events when expected %d ... please check you didn't overmerge" % (dataset,total,theParentSize))
109 continue
110 else:
111 if total!=0:
112 print("WARNING: %s is incomplete (%d events when expected %d) ... will try to find centrally produced NTUP_PILEUP or prepare to generate" % (dataset,total,theParentSize))
113 isIncomplete=True
114
115 #before adding the dataset, see if we can find an NTUP_PILEUP for it
116 if not args.skipNTUP_PILEUP:
117 ntupDatasetName = theParent.replace("DAOD_PHYS","NTUP_PILEUP")
118 ntupDatasetName = ntupDatasetName.replace("AOD","NTUP_PILEUP")
119 ntupDatasetName = ntupDatasetName.replace("aod","%")
120 ntupDatasetName = ntupDatasetName.replace("merge","%")
121 ntupDatasetName = ntupDatasetName.replace("recon","%")
122 ntupDatasetName = ntupDatasetName.replace("deriv","%")
123 #remove everything after first rtag of ami tag .. replace with wildcard
124 first_rtag_pos = ntupDatasetName.index("_r",ntupDatasetName.index("NTUP_PILEUP"))
125 try:
126 next_underscore_pos = ntupDatasetName.index("_",first_rtag_pos+1)
127 except ValueError:
128 next_underscore_pos = len(ntupDatasetName)
129 ntupDatasetName = ntupDatasetName[:next_underscore_pos]+"%"
130 try:
131 res = atlasAPI.list_datasets(client, ntupDatasetName,fields='ldn,prodsys_status')
132 except pyAMI.exception.Error:
133 print("pyAMI failed ... did you remember to authenticate: voms-proxy-init -voms atlas")
134 return 1
135 foundNTUP=False
136 for r in res:
137 if r[u'prodsys_status']!="ALL EVENTS AVAILABLE" and (isIncomplete or r[u'prodsys_status']!="EVENTS PARTIALLY AVAILABLE"): continue
138 print("Found existing NTUP_PILEUP ... please download: %s" % r[u"ldn"])
139 ntupDatasets += [r[u'ldn']]
140 foundNTUP=True
141 break
142 if foundNTUP is True: continue
143
144 aodDatasets += [theParent]
145
146
147 if len(aodDatasets)>0:
148 if args.outDS=="":
149 print("NTUP_PILEUP need generating for the following datasets, please specify the --outDS option to give a name to the output dataset: ")
150 print(",".join(aodDatasets))
151 if len(ntupDatasets)>0: aodDatasets.clear() # carry on to get the downloads script
152 else: return 1
153 else:
154 print("...submitting job to grid...")
155
156 extraOpts=""
157 if args.forceStaged: extraOpts += "--forceStaged "
158
159 mycommand = """pathena --inDS="%s" --outDS="%s" PileupReweighting/generatePRW_jobOptions.py %s--mergeOutput --nGBPerJob=MAX --addNthFieldOfInDSToLFN=2,6""" % (",".join(aodDatasets),args.outDS,extraOpts)
160
161 print("Command: %s" % mycommand)
162
163 from subprocess import call
164 if call(mycommand,shell=True)!=0:
165 print("Problem executing command. Did you remember to do: lsetup panda")
166 return 1
167
168
169 print("... finished. Please monitor your job on the grid, and when it is finished, download the files!")
170
171 if len(ntupDatasets):
172 frucio_fn = 'prw_rucio_downloads_%s.sh' % args.inDsTxt
173 print("Please download (and merge) existing config files from these datasets (see also output file %s):" % frucio_fn)
174 with open(frucio_fn, 'w') as frucio:
175 for ds in ntupDatasets:
176 command = ("rucio download %s" % ds)
177 print(command)
178 frucio.write(command + '\n')
179 print("")
180
181 if len(ntupDatasets) or len(aodDatasets):
182 print("After downloading, you are advised to check they are complete: checkPRW.py --inDsTxt=%s <downloaded files> " % args.inDsTxt)
183 print("Thank you for generating config files, you get a gold star!")
184 else:
185 print("Looks like you are ready for pileup reweighting!")
186
187 return 0
188
189
190if __name__ == "__main__":
191
192 os._exit(main())
193
void print(char *figname, TCanvas *c1)