121 """
122 This module can also be run as a standalone executable.
123 For info about the options try:
124 submissionTool.py -h
125
126 This tool is used to submit samples to the GRID, when once instance of Rivet_i per Matrix-Element
127 weight is initialised and run. Template Job Options are available in /data/RivetAnalysis_*JO*.py
128 """
129 print(
"======================================================================================= = ")
130 print(
"[INFO] processing files for ", opts.label,
" using inputs ", opts.inputSamples)
131 print(
"[INFO] ignore systematics ? ", opts.noSyst)
132 print(
"[INFO] use ME-wights from downloaded test sample ? ", opts.downloadSample)
133 print(
"[INFO] location of downloaded samples = ", opts.testSampleDir)
134 print(
"======================================================================================= = ")
135
136 isRivet = ((opts.templateJobOptions is None) and (opts.analysis is not None))
137 isAthAnalysis = ((opts.templateJobOptions is not None) and (opts.analysis is None))
138
139 if (isRivet == isAthAnalysis):
140 print(
"Configuration error.")
141 print(
"If running rivet routines, you should provide arguments for --rivetAnalysis and NOT --templateJobOptions")
142 print(
"If running AthAnalysis code, you should provide arguments for --templateJobOptions and NOT --rivetAnalysis")
143 exit(1)
144
145 if opts.nFilesPerJob > 0 or opts.nJobs > 0:
146 print(
"[WARNING] --nFilesPerJob and --nJobs options for submissionTool.py are deprecated. Advice it to let pAthena work this our by itself. If you really want to use thise options, specify them manually with --pathenaOptions")
147 exit(1)
148
149 f = open(opts.inputSamples)
150
151 for line in f.readlines():
152 line = line.strip()
153 fracString = ""
154 if len(line.split(" ")) > 1:
155 fracString = line.split(" ")[1]
156 line = line.split(" ")[0]
157 if len(line) == 0: continue
158 if line[0] == "#": continue
159 if line.strip()[-1] == '/': line = line.strip()[0:-1]
160
161 thisSampleName = line.split()[0]
162 isOfficialProduction = True
163 if thisSampleName in re.findall("user.*", thisSampleName):
164 print(
"[INFO] this sample has been indentified as a user-geneated sample rather than official ATLAS production")
165 isOfficialProduction = False
166
167
168
169
170
171
172
173 if ":" not in thisSampleName: dsid = thisSampleName
174 else: dsid = thisSampleName.split(":")[1]
175 else:
176 dsid = thisSampleName.split(".")[1]
177 print(
"\n--- [INFO] processing DSID %s ---" % dsid)
178 testSamplePath = None
179
180 frac = 1
181 nFilesInSample, nEventsInSample = -1, -1
182 if "" == fracString or fracString == "all" or fracString == "-1":
183 print(
"[INFO] sample specified with string '%s': processing all events" % fracString)
184 elif "%" in fracString:
185 nFilesInSample, nEventsInSample = getTotalNEventsAndNFiles(thisSampleName)
186 frac = float(fracString.replace("%", "")) * 0.01
187 print(
"[INFO] sample specified with string '%s': processing %d out of %d files, so %.2f%% of files" % (fracString, int(frac * nFilesInSample), nFilesInSample, 100 * (frac * nFilesInSample) / nFilesInSample))
188 opts.pathenaOptions += " --nFiles=%d " % int(frac * nFilesInSample)
189 elif "." in fracString and float(fracString) < 1:
190 nFilesInSample, nEventsInSample = getTotalNEventsAndNFiles(thisSampleName)
191 frac = float(fracString)
192 print(
"[INFO] sample specified with string '%s': processing %d out of %d files, so %.2f%% of files" % (fracString, int(frac * nFilesInSample), nFilesInSample, 100 * (frac * nFilesInSample) / nFilesInSample))
193 opts.pathenaOptions += " --nFiles=%d " % int(frac * nFilesInSample)
194 elif ("." not in fracString) and int(float(fracString)) > 1:
195 events = int(float(fracString))
196 nFilesInSample, nEventsInSample = getTotalNEventsAndNFiles(thisSampleName)
197 frac = float(events) / nEventsInSample
198 print(
"[INFO] sample specified with string '%s'. processing %d/%d=%.2f%% events, so %d/%d=%.2f%% of files" % (fracString, events, nEventsInSample, 100 * frac, int(frac * nFilesInSample), nFilesInSample, 100 * (frac * nFilesInSample) / nFilesInSample))
199 opts.pathenaOptions += " --nFiles=%d " % int(frac * nFilesInSample)
200 else:
201 print(
"[ERROR] malformed input string: %s. Should either be an integer number of events, a float between (0, 1) for the fraction of files, or a percentage of files, or 'all' (default)")
202 exit(1)
203
204 os.system("mkdir -p %s" % opts.testSampleDir)
205 testSamplePath = findTestFile(opts.testSampleDir, dsid)
206 systWeights = []
207 if opts.noSyst and isRivet:
208 if float(rivet.version()[0]) >= 3:
209 opts.templateJobOptions = "%s/data/RivetAnalysis_JO_Rivet3noSyst.py" % (submissionTemplatesDir)
210 else:
211 opts.templateJobOptions = "%s/data/RivetAnalysis_JO_noSyst.py" % (submissionTemplatesDir)
212 if not opts.noSyst:
213 print(
"[INFO] Including the Systematic Variations stored as ME weights")
214
215 list_dictionary, list_keys = rDB.getWeights(dsid)
216 if len(list_dictionary.keys()) > 0 and not opts.downloadSample:
217 if not isOfficialProduction:
218 print(
"[ERROR] your sample:", thisSampleName)
219 print(
"[ERROR]... appears to be a user-generated dataset")
220 print(
"[ERROR]... and will not be present in the DSID_database")
221 print(
"[ERROR]... try again with option --downloadSamples")
222 exit(1)
223 print(
"[INFO] Obtaining ME weights from Database")
224
225 if isRivet:
226 if float(rivet.version()[0]) >= 3:
227 opts.templateJobOptions = "%s/data/RivetAnalysis_JO_Rivet3.py" % (submissionTemplatesDir)
228 else:
229 opts.templateJobOptions = "%s/data/RivetAnalysis_JO.py" % (submissionTemplatesDir)
230 systWeights = []
231 for weightType, weightInfo in rDB.getWeights(dsid)[0].items():
232 if isinstance(weightInfo['weights'], list):
233 weights = weightInfo['weights']
234 else: weights = [weightInfo['weights']]
235 for iw in weights:
236 if iw not in systWeights:
237 systWeights.append(iw)
238 else:
239 print(
"[INFO] Obtaining ME weights from download of test file")
240
241
242 if isRivet:
243 if float(rivet.version()[0]) >= 3:
244 opts.templateJobOptions = "%s/data/RivetAnalysis_JO_Rivet3.py" % (submissionTemplatesDir)
245 else:
246 opts.templateJobOptions = "%s/data/RivetAnalysis_JO_MEfromFile.py" % (submissionTemplatesDir)
247
248 if (testSamplePath is None):
249 testSamplePath = downloadTestFile(opts.testSampleDir, thisSampleName)
250 print(
"[SUCCESS] found test file ", testSamplePath)
251
252 if isAthAnalysis:
253 print(
"[INFO] attempting to retrieve weight names from metadata... this will crash if you are trying to submit EVNT files from R21 or DAOD files from R20...")
254 from PyUtils.MetaReader import read_metadata
255 systWeights = None
256 metadata = read_metadata(testSamplePath, None, 'full')[testSamplePath]
257 if '/Generation/Parameters' in metadata:
258 genpars = metadata['/Generation/Parameters']
259 if 'HepMCWeightNames' in genpars:
260 systWeights = genpars['HepMCWeightNames']
261 print(
"[SUCCESS] we found the following syst weights!", systWeights.keys())
262 else:
263 print(
'HepMCWeightName not found in /Generation/Parameters:')
265 else:
266 print(
'/Generation/Parameters not found in metadata:')
268
269 thisSampleJobOption = opts.templateJobOptions.replace(
".py",
"_%s_%s.py" % (dsid, opts.label)).
split(
"/")[-1]
270 os.system("cp %s %s" % (opts.templateJobOptions, thisSampleJobOption))
271 systWeights = str(systWeights).
replace(
'\'',
'"')
272 os.system("sed -i -e 's|!SYSTWEIGHTS!|%s|g' %s" % (systWeights, thisSampleJobOption))
273 os.system("sed -i -e 's|!DSID!|%s|g' %s" % (dsid, thisSampleJobOption))
274 os.system("sed -i -e 's|!TESTSAMPLE!|%s|g' %s" % (testSamplePath, thisSampleJobOption))
275 os.system("sed -i -e 's|!RIVETANALYSIS!|%s|g' %s" % (opts.analysis, thisSampleJobOption))
276 if ":" in thisSampleName:
277 outputSampleName =
"user.%s." % (findUserNickname()) + thisSampleName.split(
":")[1].
replace(
".evgen.EVNT",
"").
replace(
".merge.EVNT",
"") +
".RIVET.%s" % (opts.label)
278 else:
279 outputSampleName =
"user.%s." % (findUserNickname()) + thisSampleName.replace(
".evgen.EVNT",
"").
replace(
".merge.EVNT",
"") +
".RIVET.%s" % (opts.label)
280 pathenaCommandLine = r"pathena --nFilesPerJob = 100 --long --extOutFile \*.yoda --inDS=%s --outDS=%s --extFile = RivetAnalysis_%s.so,%s.yoda %s" % (thisSampleName, outputSampleName, opts.analysis, opts.analysis, thisSampleJobOption)
281 analysis_files = []
282 if isRivet:
283 for an in opts.analysis.split(","):
284 analysis_files += ["RivetAnalysis_%s.so" % an]
285 analysis_files += ["%s.yoda" % an]
286 analysis_files = ",".join(analysis_files)
287 if len(opts.extraFiles) > 0:
288 if len(analysis_files): analysis_files += "," + opts.extraFiles
289 else: analysis_files = opts.extraFiles
290 if len(analysis_files): analysis_files = "--extFile=%s" % analysis_files
291 rootOrYoda = "root" if not isRivet else 'yoda'
292 pathenaCommandLine = r"pathena --extOutFile \*.%s --inDS=%s --outDS=%s %s %s %s" % (rootOrYoda, thisSampleName, outputSampleName, analysis_files, thisSampleJobOption, opts.pathenaOptions)
293 if opts.dryRun:
294 pathenaCommandLine += " --noSubmit"
295 else:
296 os.system(pathenaCommandLine)
297
298
std::string replace(std::string s, const std::string &s2, const std::string &s3)