17 from argparse
import RawTextHelpFormatter
19 parser = argparse.ArgumentParser(description=__doc__,formatter_class=RawTextHelpFormatter)
21 parser.add_argument(
'--inDsTxt',action=
"store",help=
"text file containing datasets to make PRW for (one per line) [REQUIRED]",required=
True)
22 parser.add_argument(
'--outDS',action=
"store",default=
"",help=
"Name of the output dataset",required=
False)
23 parser.add_argument(
'--forceStaged',action=
"store_true",help=
"If set, grid jobs will be submitted with forceStaged option")
24 parser.add_argument(
'--skipNTUP_PILEUP',action=
"store_true",help=
"If set, will not check for existing NTUP_PILEUP datasets")
25 parser.add_argument(
'prwFiles',nargs=
"*",help=
"Existing PRW Config files to check")
27 args = parser.parse_args()
30 import pyAMI.atlas.api
as atlasAPI
33 print(
"Could not import pyAMI ... please do: lsetup pyAMI")
34 print(
"Also ensure you have a valid certificate (voms-proxy-init -voms atlas)")
37 client = pyAMI.client.Client([
'atlas',
'atlas-replica'])
41 with open(args.inDsTxt)
as f: datasets = f.read().splitlines()
47 out = ROOT.CP.TPileupReweighting(
"out")
48 for f
in args.prwFiles:
50 out.ResetCountingMode()
52 periodNumbers = out.GetPeriodNumbers()
56 for dataset
in datasets:
57 d = dataset.strip(
"/")
58 if d.startswith(
"#"):
continue
59 d = d.rsplit(
":")[-1].strip()
60 if len(d)==0:
continue
63 print(
"Determining provenances of %d datasets ..." % count)
67 for dataset
in datasets:
68 dataset = dataset.strip(
"/")
70 if dataset.startswith(
"#"):
continue
71 dataset = dataset.rsplit(
":")[-1].strip()
72 if len(dataset)==0:
continue
74 print(
"Doing %s" % dataset)
76 if ".DAOD_PHYS." in dataset:
77 print(
"INFO: Assuming %s is unskimmed because it is DAOD_PHYS" % dataset)
80 prov = atlasAPI.get_dataset_prov(client, dataset )
81 if 'node' not in prov:
82 print(
"ERROR: Could not determine provenance of %s, skipping!" % dataset)
84 for ds
in prov[
'node']:
85 if ds[
u'dataType']!=
u'AOD':
continue
86 if 'recon.AOD' not in ds[
u'logicalDatasetName']:
continue
87 theParent =
str(ds[
u'logicalDatasetName'])
88 theParentSize =
int(ds[
u'events'])
91 print(
"ERROR: Could not determine provenance of %s, skipping!" % dataset)
96 if len(args.prwFiles):
98 dsid = theParent.split(
".")[1]
99 for p
in periodNumbers:
101 hist = out.GetInputHistogram(
int(dsid),p)
102 if hist: total += hist.GetEntries()
104 if total==theParentSize:
105 print(
"INFO: %s is complete in your existing PRW files. Good!" % dataset)
107 if total>theParentSize:
108 print(
"WARNING: %s is suspect in your existing PRW files, has %d events when expected %d ... please check you didn't overmerge" % (dataset,total,theParentSize))
112 print(
"WARNING: %s is incomplete (%d events when expected %d) ... will try to find centrally produced NTUP_PILEUP or prepare to generate" % (dataset,total,theParentSize))
116 if not args.skipNTUP_PILEUP:
117 ntupDatasetName = theParent.replace(
"DAOD_PHYS",
"NTUP_PILEUP")
118 ntupDatasetName = ntupDatasetName.replace(
"AOD",
"NTUP_PILEUP")
119 ntupDatasetName = ntupDatasetName.replace(
"aod",
"%")
120 ntupDatasetName = ntupDatasetName.replace(
"merge",
"%")
121 ntupDatasetName = ntupDatasetName.replace(
"recon",
"%")
122 ntupDatasetName = ntupDatasetName.replace(
"deriv",
"%")
124 first_rtag_pos = ntupDatasetName.index(
"_r",ntupDatasetName.index(
"NTUP_PILEUP"))
126 next_underscore_pos = ntupDatasetName.index(
"_",first_rtag_pos+1)
128 next_underscore_pos = len(ntupDatasetName)
129 ntupDatasetName = ntupDatasetName[:next_underscore_pos]+
"%"
131 res = atlasAPI.list_datasets(client, ntupDatasetName,fields=
'ldn,prodsys_status')
132 except pyAMI.exception.Error:
133 print(
"pyAMI failed ... did you remember to authenticate: voms-proxy-init -voms atlas")
137 if r[
u'prodsys_status']!=
"ALL EVENTS AVAILABLE" and (isIncomplete
or r[
u'prodsys_status']!=
"EVENTS PARTIALLY AVAILABLE"):
continue
138 print(
"Found existing NTUP_PILEUP ... please download: %s" % r[
u"ldn"])
139 ntupDatasets += [r[
u'ldn']]
142 if foundNTUP==
True:
continue
144 aodDatasets += [theParent]
147 if len(aodDatasets)>0:
149 print(
"NTUP_PILEUP need generating for the following datasets, please specify the --outDS option to give a name to the output dataset: ")
151 if len(ntupDatasets)>0: aodDatasets.clear()
154 print(
"...submitting job to grid...")
157 if args.forceStaged: extraOpts +=
"--forceStaged "
159 mycommand =
"""pathena --inDS="%s" --outDS="%s" PileupReweighting/generatePRW_jobOptions.py %s--mergeOutput --nGBPerJob=MAX --addNthFieldOfInDSToLFN=2,6""" % (
",".
join(aodDatasets),args.outDS,extraOpts)
161 print(
"Command: %s" % mycommand)
163 from subprocess
import call
164 if call(mycommand,shell=
True)!=0:
165 print(
"Problem executing command. Did you remember to do: lsetup panda")
169 print(
"... finished. Please monitor your job on the grid, and when it is finished, download the files!")
171 if len(ntupDatasets):
172 frucio_fn =
'prw_rucio_downloads_%s.sh' % args.inDsTxt
173 print(
"Please download (and merge) existing config files from these datasets (see also output file %s):" % frucio_fn)
174 with open(frucio_fn,
'w')
as frucio:
175 for ds
in ntupDatasets:
176 command = (
"rucio download %s" % ds)
178 frucio.write(command +
'\n')
181 if len(ntupDatasets)
or len(aodDatasets):
182 print(
"After downloading, you are advised to check they are complete: checkPRW.py --inDsTxt=%s <downloaded files> " % args.inDsTxt)
183 print(
"Thank you for generating config files, you get a gold star!")
185 print(
"Looks like you are ready for pileup reweighting!")