ATLAS Offline Software
Loading...
Searching...
No Matches
makeStrongReductionTests.py
Go to the documentation of this file.
1# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
2
3# Script by Kate Pachal, December 2016
4
5import sys
6import os
7import argparse
8import time
9import re
10import subprocess
11import datetime
12import random
13import glob
14import time
15
16from itertools import combinations,combinations_with_replacement,permutations,product
17import operator
18
19# This is essentially code to solve the problem of sorting n distinguishable
20# balls into m indistinguishable boxes. We want all unique combinations of
21# parameters into 3 (or n) categories.
22
23# Solution is recursive following the outline given in
24# http://www.elcamino.edu/faculty/gfry/210/DistributeDifBallsDifBoxes.pdf
25
26# For 19 globally reduced nuisance parameters going into 3 strongly reduced NPs,
27# number of configs is s2(19,3) = 193448101: yikes!
28
29# For sake of sanity, not going to go this route. Instead, look at all ways to
30# choose groups of ~int(n/3) uniquely.
31# Using this route, 16 globally reduced parameters give
32# 1009008 configs. This is more reasonable.
33
34# Since Steven used to group all but leading stat uncertainty together,
35# I will group final 5 together, reducing nPars from 19 to 15.
36# This can be done by changing XXX to XXY for all pars you wish to see
37# treated as 1 unit in config file.
38
39# Usage:
40# --d,dir yourRelease, example JES_2016/Moriond2017
41# --t,tmp templateFile, and a path within that release to the template to use
42# --n,npars nPars, the number of reduced parameters to allow (default 3; occasionally 4)
43# --r,ref reference config name, example JES2016_AllNuisanceParameters.config
44
45# A few things which will rarely be changed, and so
46# are not set by command line:
47templateScript = "scripts/StrongReductions_BatchScript_Template.sh"
48ScriptArchive = "NPBashScripts"
49# These are two-point systematics to relate via Steven's function system
50# (which I still don't understand)
51compsToSplit = ["EtaIntercalibration_Modelling_orig","Flavor_Composition_orig","Flavor_Response_orig","BJES_Response"]
52
53# NOTE: THIS CURRENTLY ONLY WORKS FOR m=3!
54# Time estimates: for 10 into 3, 0.0205 s
55# for 12 into 3, 0.1821 s
56# for 15 into 3, 4.5851 s
57# for 19 into 3, ?? s (XXX combinations)
59
60 myCombinations = []
61
62 count = 0
63 # First, look at unique ways to choose first 1/3.
64 for indices in combinations(range(n), int(n/m)) :
65 # We have one set of 7 and a remaining m-int(n/m) to be split in two.
66 remaining = sorted([l for l in range(n) if l not in indices])
67 for secondSet in combinations((x for x in remaining),int((n-int(n/m))/(m-1))) :
68 secondSet = sorted(secondSet)
69 final = sorted([l for l in remaining if l not in secondSet])
70 newlist = sorted([tuple(indices),tuple(secondSet),tuple(final)])
71 count = count+1
72 myCombinations.append(tuple(newlist))
73
74 print "Removing duplicates."
75 myCombinations = list(set(myCombinations))
76 print "Examined",count,"combinations of which",len(myCombinations),"were unique."
77 return myCombinations
78
79# This assumes your n parameters are sensibly numbered in the file.
80def makeNewConfigFile(templateFile,combinations,index,npars) :
81
82 newConfName = templateFile.replace("template","index{0}".format(index))
83 combs = {0: combinations[0], 1 : combinations[1], 2:combinations[2]}
84 count = 0
85 holdVal = -1
86 with open(templateFile) as input, open(newConfName,"write") as output :
87 for line in input :
88 if "XXX" in line or "XXY" in line :
89 if "XXY" in line and holdVal > 0 :
90 line = line.replace("XXY","{0}".format(holdVal))
91 output.write(line)
92 continue
93 for val,indices in combs.iteritems() :
94 if count in indices :
95 addon = 100+val
96 count = count+1
97 if "XXY" in line :
98 holdVal = addon
99 line = line.replace("XXX","{0}".format(addon))
100 line = line.replace("XXY","{0}".format(addon))
101 if count == npars : break
102 output.write(line)
103
104 return newConfName
105
106class JESGroup :
107 def __init__(self):
108 self.name = ""
109 self.index = 0
110 self.groupID = 0
112
113def replace_right(source, target, replacement, replacements=None):
114 return replacement.join(source.rsplit(target, replacements))
115
116def makeSplitConfigs(config,redo=True) :
117
118 # Groups are only 3 in version I'm prepared for.
119 groups = [100,101,102]
120 JESGroups = {}
121
122 # Keep track of number of variations and give
123 # unique names
124 nCombs = 2**len(compsToSplit)
125 if not redo:
126 return nCombs
127
128 mycombinations = tuple(combinations_with_replacement([0,1], len(compsToSplit)))
129 mypermutations = []
130 for i in mycombinations :
131 mypermutations = mypermutations + list(set(tuple(permutations(i))))
132
133 outfiles = []
134 for index in range(len(mypermutations)) :
135 newname = config.replace(".config","_splitV{0}.config".format(index))
136 newname = newname.replace("generateConfigs","splitsConfigs")
137 outfiles.append(open(newname, 'w'))
138
139 # Read in config file and write various options
140 thisVal = -1
141 continuing = False
142 addingSplit = False
143 splitComp = ""
144 group = None
145 with open(config,"r") as input :
146
147 for line in input :
148
149 # Just write comments.
150 if not line.isspace() and not line.startswith("JES") :
151 for file in outfiles : file.write(line)
152 continue
153
154 # Blank line can signal end of component.
155 if line.isspace() :
156 if continuing and addingSplit :
157 for index, file in enumerate(outfiles) :
158 possible = groups[:]
159 newLine = "JESComponent.{0}.Split: 1\n\n".format(thisVal)
160 file.write(newLine)
161
162 # Now need matching opposite section
163 for bonusline in lines :
164 bonusline = bonusline.replace(".{0}.".format(thisVal),".{0}.".format(110+splitComp))
165 if ".Group" in bonusline :
166 # Two possibilities. First is that this is a standalone item
167 # and group will be one of 100, 101, 102.
168 # Alternately, part of a larger JES group and need to get index from that.
169 if group > 99 :
170 groupIndex = group
171 else :
172 groupIndex = JESGroups[group].groupingAssigned
173 possible.remove(groupIndex)
174 useIndex = mypermutations[index][splitComp]
175 # Need to replace only last instance below
176 bonusline = replace_right(bonusline,"{0}".format(group),"{0}".format(possible[useIndex]),1)
177 file.write(bonusline)
178
179 newLine = "JESComponent.{0}.Split: -1\n".format(110+splitComp)
180 file.write(newLine)
181
182 for file in outfiles : file.write(line)
183 continue
184
185 # What component index is this?
186 items = filter(None,line.split())
187 description = items[0]
188 val = eval(description.split(".")[1])
189 # Start a new component/group
190 if val != thisVal :
191 thisVal = val
192 continuing = False
193 lines = [line]
194 else :
195 continuing = True
196 lines.append(line)
197 if line.startswith("JESComponent") :
198 # Can I get the name?
199 if "Name" in description :
200 # Is it one I want?
201 if any(ext in line for ext in compsToSplit) :
202 print "Found component", items[1]
203 addingSplit = True
204 splitComp = compsToSplit.index(items[1])
205 else :
206 addingSplit = False
207 if ".Group" in description :
208 group = eval(items[1])
209
210 # Groups need to be stored in a look-up format
211 # so I can pick up info from them later.
212 if line.startswith("JESGroup") :
213 continuing = False
214 # Can I get the name?
215 if "Name" in description :
216 newGroup = JESGroup()
217 newGroup.index = thisVal
218 newGroup.name = items[1]
219 JESGroups[thisVal] = newGroup
220 if ".Group" in description :
221 JESGroups[thisVal].groupID = eval(items[1])
222 if ".SubGroup" in description :
223 JESGroups[thisVal].groupingAssigned = eval(items[1])
224
225 for file in outfiles :
226 file.write(line)
227
228 for file in outfiles : file.close()
229
230 return len(mypermutations)
231
232def batchSubmit(command, index, phrase) :
233
234 # Perform setLimitsOneMassPoint on batch
235 batchcommand = command
236
237 # Open batch script as fbatchin
238 fbatchin = open(templateScript, 'r')
239 fbatchindata = fbatchin.read()
240 fbatchin.close()
241
242 # open modified batch script (fbatchout) for writing
243 batchtempname = '{0}/StrongReductions_BatchScript_{1}_index{2}.sh'.format(ScriptArchive,phrase,index)
244 fbatchout = open(batchtempname,'w')
245 fbatchoutdata = fbatchindata.replace("ZZZ",batchcommand) # In batch script replace ZZZ for submit command
246 fbatchout.write(fbatchoutdata)
247 modcommand = 'chmod 744 {0}'.format(batchtempname)
248 subprocess.call(modcommand, shell=True)
249 fbatchout.close()
250 submitcommand = "qsub {0}".format(batchtempname)
251 print submitcommand
252 subprocess.call(submitcommand, shell=True)
253
254def runMatrixCommands(nominalFile,config,outPlotName,outRootName,index,batch) :
255
256 # If root file modification time more recent than config modification time,
257 # then we don't have to do this.
258 fileNew = False
259 if os.path.isfile(outRootName):
260 file_time = os.path.getmtime(outRootName)
261 config_time = os.path.getmtime(config)
262 if config_time < file_time : fileNew = True
263
264 if not fileNew :
265 command = 'jetDefinition="AntiKt4EMTopo" configFiles="{0};{1}" outFile="{2}" outHistFile="{3}" bash runMakeCorrelationMatrixPlots.sh Flexible FineGridFixedEta false'.format(nominalFile,config,outPlotName,outRootName)
266 if batch :
267 batchSubmit(command, index, "Matrices")
268 else :
269 subprocess.call(command,shell=True)
270
271def runComparisonCommands(outFileName,plotsName,file1,file2,file3,file4,string,batch) :
272
273 # If output canvases already exist, then unless
274 # root file modification time more recent than canvas modification time,
275 # we don't have to do this.
276 canvasExists = False
277 earliestCanvas = 1E25
278 canvases = glob.glob(outFileName.replace(".png","*.png"))
279 for canvas in canvases :
280 canvasExists = True
281 if os.path.getmtime(canvas) < earliestCanvas : earliestCanvas = os.path.getmtime(canvas)
282
283 latestRootFile = 0
284 for rootFileName in [file1,file2,file3,file4] :
285 if not os.path.isfile(rootFileName) :
286 print "ERROR! Root file does not exist."
287 return
288 if os.path.getmtime(rootFileName) > latestRootFile : latestRootFile = os.path.getmtime(rootFileName)
289
290 if canvasExists and latestRootFile < earliestCanvas :
291 print "Combination already calculated; skip"
292 return
293
294 command = 'python ../python_scripts/Make4DCorrelationMatrix.py "AntiKt4EMTopo" {0} {1} false {2} {3} {4} {5}'.format(outFileName,plotsName,file1,file2,file3,file4)
295 if batch :
296 batchSubmit(command, string, "CompareSets")
297 else :
298 subprocess.call(command,shell=True)
299
300def main() :
301
302 parser = argparse.ArgumentParser()
303 parser.add_argument("-d","--dir")
304 parser.add_argument("-t","--tmp")
305 parser.add_argument("-n","--npars",default=3)
306 parser.add_argument("-r","--ref")
307 parser.add_argument("-b","--batch",action='store_true')
308 parser.add_argument("-ns","--nosubmit", action='store_true')
309 parser.add_argument("-rc","--redo-configs", action='store_true')
310 parser.add_argument("-ntm","--ntestsmatrices",default=100)
311 parser.add_argument("-ntc","--ntestscomparisons",default=100)
312 parser.add_argument("--matrixStage",action='store_true')
313 parser.add_argument("--comparisonStage",action='store_true')
314 parser.add_argument("--investigateComb",default="",type=str)
315 parser.add_argument("--investigateSplits",default="",type=str)
316 args = parser.parse_args()
317 print args
318
319 shareDir = os.getcwd()+"/../../share/"+args.dir
320 nominalFile = shareDir+"/"+args.ref
321 templateFile = shareDir+"/"+args.tmp
322 newFilesDir = ""
323 if "/" in templateFile :
324 for item in templateFile.split("/")[:-1] :
325 newFilesDir = newFilesDir+"/"+item
326
327 nPars = args.npars
328
329 print "sharedir",shareDir
330 print "nominal",nominalFile
331 print "template",templateFile
332 print "new configs will go in",newFilesDir
333 print "using",nPars,"parameters, not counting eta intercalibration non-closure."
334
335 doPlots = False
336
337 # Now we make the new configuration files.
338 configFileList = []
339 nConfigs = 0
340 if args.redo_configs and not args.investigateSplits :
341 start_time = time.time()
342 fullCombList = computeCombinations(15,3) # 15
343 index = -1
344 print("--- %s seconds ---" % (time.time() - start_time))
345 for comb in fullCombList :
346 index = index+1
347 thisname = makeNewConfigFile(templateFile,comb,index,args.npars)
348 #configFileList.append(thisname)
349 nConfigs = index+1
350 else :
351 nConfigs = len([os.path.join(newFilesDir, f) for f in os.listdir(newFilesDir) if "index" in f])
352
353 # Begin matrix computation stage, if desired.
354 outRootDir = os.getcwd()+"/NPRootFiles/"
355 outRootFormat = outRootDir+"matrices_{0}.root"
356 if args.matrixStage and not args.investigateSplits :
357
358 # Rather than run on all consecutive configs up to ntestsmatrices,
359 # we want configs that are not overly similar to each other.
360 # So randomly select (non-repeating) indices between 0 and the number
361 # of configs available, and run on these until the limit is reached.
362 useval = int(args.ntestsmatrices)
363 indicesToUse = random.sample(xrange(0,nConfigs), useval)
364 for index in indicesToUse :
365
366 # We know what the config matching this index should be.
367 config = templateFile.replace("template","index{0}".format(index))
368
369 # We have 1 config for every set of NP's we wish to examine. Now
370 # make a correlation matrix for each. This is a batch job!
371 outRootName = outRootFormat.format(index)
372 if doPlots :
373 outPlotName = os.getcwd()+"/NPPlots/matrices_{0}.pdf".format(index)
374 else :
375 outPlotName = "NONE"
376
377 runMatrixCommands(nominalFile,config,outPlotName,outRootName,index,args.batch)
378
379 # Begin matrix comparison stage, if desired.
380 if args.comparisonStage and not args.investigateSplits :
381
382 # Now we need to compare sets of 4 and look for one with a low enough difference.
383 # Have to be chosen from among existing root files *which may or may not be
384 # the same as what was just run* since they get replaced only if needed.
385 rootFiles = [os.path.join(outRootDir, f) for f in os.listdir(outRootDir) if "matrices" in f]
386 indicesAvailable = []
387 for file in rootFiles :
388 index = re.findall(r'\d+', file.split("/")[-1].split("_")[-1])
389 indicesAvailable.append(eval(index[0]))
390
391 # In special cases you may have a specific combination you know is good and you want to explore
392 # variations rather close to it to see if it can be improved upon.
393 # Call with the combination and look at others which differ by only 1 config:
394 if args.investigateComb :
395 goodComb = tuple([eval(item) for item in args.investigateComb.split(",")])
396 newList = []
397 for index in range(4) :
398 for new in random.sample(indicesAvailable,max(1,int(float(args.ntestscomparisons)/4.0))) :
399 if not new in goodComb :
400 newComb = list(goodComb)
401 newComb[index] = new
402 newList.append(tuple(newComb))
403 pool = tuple(newList)
404
405 # Default option has no restrictions.
406 else :
407 # There are a crazy number of combinations so just pick a set of those at random
408 # if there are more than 15 of them...
409 random.shuffle(indicesAvailable)
410 if len(indicesAvailable) > 15 :
411 indicesAvailable = indicesAvailable[:15]
412
413 # Now run on unique combinations up to a cutoff number of tries.
414 # For now this is the same as the number of individual files I created,
415 # with the assumption that my patience for this step is about equal ;)
416 pool = tuple(combinations(indicesAvailable,4))
417 #pool = tuple([tuple([116061,36885,33399,64587])])
418
419 for indices in random.sample(pool,min(len(pool),args.ntestscomparisons)) :
420 print "Testing combination",indices
421
422 outFileName = os.getcwd()+"/NPPlots/SR-4D-{0}-{1}-{2}-{3}.png".format(indices[0],indices[1],indices[2],indices[3])
423
424 runComparisonCommands(outFileName,"NONE",outRootFormat.format(indices[0]),outRootFormat.format(indices[1],outRootFormat.format(indices[2])),outRootFormat.format(indices[2]),outRootFormat.format(indices[3]),"{0}-{1}-{2}-{3}".format(indices[0],indices[1],indices[2],indices[3]),args.batch)
425
426 # Let's say we have a good combination but now we need
427 # to check out the various options for splitting configs.
428 if args.investigateSplits :
429
430 configIndices = tuple([eval(item) for item in args.investigateSplits.split(",")])
431 print configIndices
432 splitconfigs = [templateFile.replace("template","index{0}".format(index)) for index in configIndices[1:]]
433 print splitconfigs
434 iterables = []
435 for configindex, config in zip(configIndices[1:],splitconfigs) :
436 newIndices = makeSplitConfigs(config,args.redo_configs)
437 newIterables = []
438
439 for index in range(newIndices) :
440 outRootName = (outRootFormat.format(configindex)).replace(".root","_splitV{0}.root".format(index))
441 newconfig = config.replace(".config","_splitV{0}.config".format(index))
442 newconfig = newconfig.replace("generateConfigs","splitsConfigs")
443 newIterables.append("{0}_splitV{1}".format(configindex,index))
444
445 runMatrixCommands(nominalFile,newconfig,"NONE",outRootName,"{0}-{1}".format(configindex,index),args.batch)
446
447 iterables.append(newIterables)
448
449 # Wait for matrices to finish
450 isdone = not subprocess.check_output("qstat",shell=True)
451 while not isdone :
452 time.sleep(20)
453 isdone = not subprocess.check_output("qstat",shell=True)
454
455 #Run comparisons
456 pool = tuple(product([configIndices[0]],iterables[0],iterables[1],iterables[2]))
457
458 for indices in random.sample(pool,min(len(pool),args.ntestscomparisons)) :
459 print "Testing combination",indices
460
461 outFileName = os.getcwd()+"/NPPlots/SR-4D-{0}-{1}-{2}-{3}.png".format(indices[0],indices[1],indices[2],indices[3])
462
463 runComparisonCommands(outFileName,"NONE",outRootFormat.format(indices[0]),outRootFormat.format(indices[1],outRootFormat.format(indices[2])),outRootFormat.format(indices[2]),outRootFormat.format(indices[3]),"{0}-{1}-{2}-{3}".format(indices[0],indices[1],indices[2],indices[3]),args.batch)
464
465if __name__ == "__main__":
466 main()
void print(char *figname, TCanvas *c1)
#define min(a, b)
Definition cfImp.cxx:40
#define max(a, b)
Definition cfImp.cxx:41
STL class.
void xrange(TH1 *h, bool symmetric)
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition hcg.cxx:310
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
replace_right(source, target, replacement, replacements=None)
runComparisonCommands(outFileName, plotsName, file1, file2, file3, file4, string, batch)
batchSubmit(command, index, phrase)
runMatrixCommands(nominalFile, config, outPlotName, outRootName, index, batch)
makeNewConfigFile(templateFile, combinations, index, npars)