ATLAS Offline Software
Functions | Variables
generatePRW Namespace Reference

Functions

def main ()
 

Variables

string __author__ = "Will Buttinger"
 
string __doc__
 

Function Documentation

◆ main()

def generatePRW.main ( )

Definition at line 16 of file generatePRW.py.

16 def main():
17  from argparse import RawTextHelpFormatter
18 
19  parser = argparse.ArgumentParser(description=__doc__,formatter_class=RawTextHelpFormatter)
20 
21  parser.add_argument('--inDsTxt',action="store",help="text file containing datasets to make PRW for (one per line) [REQUIRED]",required=True)
22  parser.add_argument('--outDS',action="store",default="",help="Name of the output dataset",required=False)
23  parser.add_argument('--forceStaged',action="store_true",help="If set, grid jobs will be submitted with forceStaged option")
24  parser.add_argument('--skipNTUP_PILEUP',action="store_true",help="If set, will not check for existing NTUP_PILEUP datasets")
25  parser.add_argument('prwFiles',nargs="*",help="Existing PRW Config files to check")
26 
27  args = parser.parse_args()
28 
29  try:
30  import pyAMI.atlas.api as atlasAPI
31  import pyAMI.client
32  except ImportError:
33  print("Could not import pyAMI ... please do: lsetup pyAMI")
34  print("Also ensure you have a valid certificate (voms-proxy-init -voms atlas)")
35  return 1
36 
37  client = pyAMI.client.Client(['atlas', 'atlas-replica'])
38  atlasAPI.init()
39 
40  #read datasets into list
41  with open(args.inDsTxt) as f: datasets = f.read().splitlines()
42 
43 
44 
45  import ROOT
46 
47  out = ROOT.CP.TPileupReweighting("out")
48  for f in args.prwFiles:
49  out.AddConfigFile(f)
50  out.ResetCountingMode() #trick tool into going into counting mode
51  #list of known period numbers
52  periodNumbers = out.GetPeriodNumbers()
53 
54  # count lines
55  count=0
56  for dataset in datasets:
57  d = dataset.strip("/")
58  if d.startswith("#"): continue
59  d = d.rsplit(":")[-1].strip()
60  if len(d)==0: continue
61  count+=1
62 
63  print("Determining provenances of %d datasets ..." % count)
64 
65  aodDatasets=[]
66  ntupDatasets=[]
67  for dataset in datasets:
68  dataset = dataset.strip("/")
69  #strip the scope if it's there
70  if dataset.startswith("#"): continue
71  dataset = dataset.rsplit(":")[-1].strip()
72  if len(dataset)==0: continue
73 
74  print("Doing %s" % dataset)
75  theParent=""
76  if ".DAOD_PHYS." in dataset:
77  print("INFO: Assuming %s is unskimmed because it is DAOD_PHYS" % dataset)
78  theParent = dataset
79  else:
80  prov = atlasAPI.get_dataset_prov(client, dataset )
81  if 'node' not in prov:
82  print("ERROR: Could not determine provenance of %s, skipping!" % dataset)
83  continue
84  for ds in prov['node']:
85  if ds[u'dataType']!=u'AOD': continue
86  if 'recon.AOD' not in ds[u'logicalDatasetName']: continue
87  theParent = str(ds[u'logicalDatasetName'])
88  theParentSize = int(ds[u'events'])
89  break
90  if theParent=="":
91  print("ERROR: Could not determine provenance of %s, skipping!" % dataset)
92  continue
93 
94  #check input prw files, if we specified
95  isIncomplete=False
96  if len(args.prwFiles):
97  total=0;
98  dsid = theParent.split(".")[1]
99  for p in periodNumbers:
100  if p==-1: continue
101  hist = out.GetInputHistogram(int(dsid),p)
102  if hist: total += hist.GetEntries()
103 
104  if total==theParentSize:
105  print("INFO: %s is complete in your existing PRW files. Good!" % dataset)
106  continue
107  if total>theParentSize:
108  print("WARNING: %s is suspect in your existing PRW files, has %d events when expected %d ... please check you didn't overmerge" % (dataset,total,theParentSize))
109  continue
110  else:
111  if total!=0:
112  print("WARNING: %s is incomplete (%d events when expected %d) ... will try to find centrally produced NTUP_PILEUP or prepare to generate" % (dataset,total,theParentSize))
113  isIncomplete=True
114 
115  #before adding the dataset, see if we can find an NTUP_PILEUP for it
116  if not args.skipNTUP_PILEUP:
117  ntupDatasetName = theParent.replace("DAOD_PHYS","NTUP_PILEUP")
118  ntupDatasetName = ntupDatasetName.replace("AOD","NTUP_PILEUP")
119  ntupDatasetName = ntupDatasetName.replace("aod","%")
120  ntupDatasetName = ntupDatasetName.replace("merge","%")
121  ntupDatasetName = ntupDatasetName.replace("recon","%")
122  ntupDatasetName = ntupDatasetName.replace("deriv","%")
123  #remove everything after first rtag of ami tag .. replace with wildcard
124  first_rtag_pos = ntupDatasetName.index("_r",ntupDatasetName.index("NTUP_PILEUP"))
125  try:
126  next_underscore_pos = ntupDatasetName.index("_",first_rtag_pos+1)
127  except ValueError:
128  next_underscore_pos = len(ntupDatasetName)
129  ntupDatasetName = ntupDatasetName[:next_underscore_pos]+"%"
130  try:
131  res = atlasAPI.list_datasets(client, ntupDatasetName,fields='ldn,prodsys_status')
132  except pyAMI.exception.Error:
133  print("pyAMI failed ... did you remember to authenticate: voms-proxy-init -voms atlas")
134  return 1
135  foundNTUP=False
136  for r in res:
137  if r[u'prodsys_status']!="ALL EVENTS AVAILABLE" and (isIncomplete or r[u'prodsys_status']!="EVENTS PARTIALLY AVAILABLE"): continue
138  print("Found existing NTUP_PILEUP ... please download: %s" % r[u"ldn"])
139  ntupDatasets += [r[u'ldn']]
140  foundNTUP=True
141  break
142  if foundNTUP==True: continue
143 
144  aodDatasets += [theParent]
145 
146 
147  if len(aodDatasets)>0:
148  if args.outDS=="":
149  print("NTUP_PILEUP need generating for the following datasets, please specify the --outDS option to give a name to the output dataset: ")
150  print(",".join(aodDatasets))
151  if len(ntupDatasets)>0: aodDatasets.clear() # carry on to get the downloads script
152  else: return 1
153  else:
154  print("...submitting job to grid...")
155 
156  extraOpts=""
157  if args.forceStaged: extraOpts += "--forceStaged "
158 
159  mycommand = """pathena --inDS="%s" --outDS="%s" PileupReweighting/generatePRW_jobOptions.py %s--mergeOutput --nGBPerJob=MAX --addNthFieldOfInDSToLFN=2,6""" % (",".join(aodDatasets),args.outDS,extraOpts)
160 
161  print("Command: %s" % mycommand)
162 
163  from subprocess import call
164  if call(mycommand,shell=True)!=0:
165  print("Problem executing command. Did you remember to do: lsetup panda")
166  return 1
167 
168 
169  print("... finished. Please monitor your job on the grid, and when it is finished, download the files!")
170 
171  if len(ntupDatasets):
172  frucio_fn = 'prw_rucio_downloads_%s.sh' % args.inDsTxt
173  print("Please download (and merge) existing config files from these datasets (see also output file %s):" % frucio_fn)
174  with open(frucio_fn, 'w') as frucio:
175  for ds in ntupDatasets:
176  command = ("rucio download %s" % ds)
177  print(command)
178  frucio.write(command + '\n')
179  print("")
180 
181  if len(ntupDatasets) or len(aodDatasets):
182  print("After downloading, you are advised to check they are complete: checkPRW.py --inDsTxt=%s <downloaded files> " % args.inDsTxt)
183  print("Thank you for generating config files, you get a gold star!")
184  else:
185  print("Looks like you are ready for pileup reweighting!")
186 
187  return 0
188 
189 

Variable Documentation

◆ __author__

string generatePRW.__author__ = "Will Buttinger"
private

Definition at line 3 of file generatePRW.py.

◆ __doc__

string generatePRW.__doc__
private
Initial value:
1 = """
2 Submit a task to the grid to generate PRW config files for a list of datasets.
3 This command ensures that the PRW are generated from the unskimmed primary AOD datasets.
4 
5 Example: generatePRW.py --outDS=user.will.myPRW --inDsTxt=datasets.txt
6 
7 """

Definition at line 4 of file generatePRW.py.

CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
generatePRW.main
def main()
Definition: generatePRW.py:16
python.trfUtils.call
def call(args, bufsize=0, executable=None, stdin=None, preexec_fn=None, close_fds=False, shell=False, cwd=None, env=None, universal_newlines=False, startupinfo=None, creationflags=0, message="", logger=msg, loglevel=None, timeout=None, retry=2, timefactor=1.5, sleeptime=10)
Definition: trfUtils.py:155
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:25
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
Trk::open
@ open
Definition: BinningType.h:40
str
Definition: BTagTrackIpAccessor.cxx:11