ATLAS Offline Software
makeStrongReductionTests.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
2 
3 # Script by Kate Pachal, December 2016
4 
5 import sys
6 import os
7 import argparse
8 import time
9 import re
10 import subprocess
11 import datetime
12 import random
13 import glob
14 import time
15 
16 from itertools import combinations,combinations_with_replacement,permutations,product
17 import operator
18 
19 # This is essentially code to solve the problem of sorting n distinguishable
20 # balls into m indistinguishable boxes. We want all unique combinations of
21 # parameters into 3 (or n) categories.
22 
23 # Solution is recursive following the outline given in
24 # http://www.elcamino.edu/faculty/gfry/210/DistributeDifBallsDifBoxes.pdf
25 
26 # For 19 globally reduced nuisance parameters going into 3 strongly reduced NPs,
27 # number of configs is s2(19,3) = 193448101: yikes!
28 
29 # For sake of sanity, not going to go this route. Instead, look at all ways to
30 # choose groups of ~int(n/3) uniquely.
31 # Using this route, 16 globally reduced parameters give
32 # 1009008 configs. This is more reasonable.
33 
34 # Since Steven used to group all but leading stat uncertainty together,
35 # I will group final 5 together, reducing nPars from 19 to 15.
36 # This can be done by changing XXX to XXY for all pars you wish to see
37 # treated as 1 unit in config file.
38 
39 # Usage:
40 # --d,dir yourRelease, example JES_2016/Moriond2017
41 # --t,tmp templateFile, and a path within that release to the template to use
42 # --n,npars nPars, the number of reduced parameters to allow (default 3; occasionally 4)
43 # --r,ref reference config name, example JES2016_AllNuisanceParameters.config
44 
45 # A few things which will rarely be changed, and so
46 # are not set by command line:
47 templateScript = "scripts/StrongReductions_BatchScript_Template.sh"
48 ScriptArchive = "NPBashScripts"
49 # These are two-point systematics to relate via Steven's function system
50 # (which I still don't understand)
51 compsToSplit = ["EtaIntercalibration_Modelling_orig","Flavor_Composition_orig","Flavor_Response_orig","BJES_Response"]
52 
53 # NOTE: THIS CURRENTLY ONLY WORKS FOR m=3!
54 # Time estimates: for 10 into 3, 0.0205 s
55 # for 12 into 3, 0.1821 s
56 # for 15 into 3, 4.5851 s
57 # for 19 into 3, ?? s (XXX combinations)
59 
60  myCombinations = []
61 
62  count = 0
63  # First, look at unique ways to choose first 1/3.
64  for indices in combinations(range(n), int(n/m)) :
65  # We have one set of 7 and a remaining m-int(n/m) to be split in two.
66  remaining = sorted([l for l in range(n) if l not in indices])
67  for secondSet in combinations((x for x in remaining),int((n-int(n/m))/(m-1))) :
68  secondSet = sorted(secondSet)
69  final = sorted([l for l in remaining if l not in secondSet])
70  newlist = sorted([tuple(indices),tuple(secondSet),tuple(final)])
71  count = count+1
72  myCombinations.append(tuple(newlist))
73 
74  print "Removing duplicates."
75  myCombinations = list(set(myCombinations))
76  print "Examined",count,"combinations of which",len(myCombinations),"were unique."
77  return myCombinations
78 
79 # This assumes your n parameters are sensibly numbered in the file.
80 def makeNewConfigFile(templateFile,combinations,index,npars) :
81 
82  newConfName = templateFile.replace("template","index{0}".format(index))
83  combs = {0: combinations[0], 1 : combinations[1], 2:combinations[2]}
84  count = 0
85  holdVal = -1
86  with open(templateFile) as input, open(newConfName,"write") as output :
87  for line in input :
88  if "XXX" in line or "XXY" in line :
89  if "XXY" in line and holdVal > 0 :
90  line = line.replace("XXY","{0}".format(holdVal))
91  output.write(line)
92  continue
93  for val,indices in combs.iteritems() :
94  if count in indices :
95  addon = 100+val
96  count = count+1
97  if "XXY" in line :
98  holdVal = addon
99  line = line.replace("XXX","{0}".format(addon))
100  line = line.replace("XXY","{0}".format(addon))
101  if count == npars : break
102  output.write(line)
103 
104  return newConfName
105 
106 class JESGroup :
107  def __init__(self):
108  self.name = ""
109  self.index = 0
110  self.groupID = 0
112 
113 def replace_right(source, target, replacement, replacements=None):
114  return replacement.join(source.rsplit(target, replacements))
115 
116 def makeSplitConfigs(config,redo=True) :
117 
118  # Groups are only 3 in version I'm prepared for.
119  groups = [100,101,102]
120  JESGroups = {}
121 
122  # Keep track of number of variations and give
123  # unique names
124  nCombs = 2**len(compsToSplit)
125  if not redo:
126  return nCombs
127 
128  mycombinations = tuple(combinations_with_replacement([0,1], len(compsToSplit)))
129  mypermutations = []
130  for i in mycombinations :
131  mypermutations = mypermutations + list(set(tuple(permutations(i))))
132 
133  outfiles = []
134  for index in range(len(mypermutations)) :
135  newname = config.replace(".config","_splitV{0}.config".format(index))
136  newname = newname.replace("generateConfigs","splitsConfigs")
137  outfiles.append(open(newname, 'w'))
138 
139  # Read in config file and write various options
140  thisVal = -1
141  continuing = False
142  addingSplit = False
143  splitComp = ""
144  group = None
145  with open(config,"r") as input :
146 
147  for line in input :
148 
149  # Just write comments.
150  if not line.isspace() and not line.startswith("JES") :
151  for file in outfiles : file.write(line)
152  continue
153 
154  # Blank line can signal end of component.
155  if line.isspace() :
156  if continuing and addingSplit :
157  for index, file in enumerate(outfiles) :
158  possible = groups[:]
159  newLine = "JESComponent.{0}.Split: 1\n\n".format(thisVal)
160  file.write(newLine)
161 
162  # Now need matching opposite section
163  for bonusline in lines :
164  bonusline = bonusline.replace(".{0}.".format(thisVal),".{0}.".format(110+splitComp))
165  if ".Group" in bonusline :
166  # Two possibilities. First is that this is a standalone item
167  # and group will be one of 100, 101, 102.
168  # Alternately, part of a larger JES group and need to get index from that.
169  if group > 99 :
170  groupIndex = group
171  else :
172  groupIndex = JESGroups[group].groupingAssigned
173  possible.remove(groupIndex)
174  useIndex = mypermutations[index][splitComp]
175  # Need to replace only last instance below
176  bonusline = replace_right(bonusline,"{0}".format(group),"{0}".format(possible[useIndex]),1)
177  file.write(bonusline)
178 
179  newLine = "JESComponent.{0}.Split: -1\n".format(110+splitComp)
180  file.write(newLine)
181 
182  for file in outfiles : file.write(line)
183  continue
184 
185  # What component index is this?
186  items = filter(None,line.split())
187  description = items[0]
188  val = eval(description.split(".")[1])
189  # Start a new component/group
190  if val != thisVal :
191  thisVal = val
192  continuing = False
193  lines = [line]
194  else :
195  continuing = True
196  lines.append(line)
197  if line.startswith("JESComponent") :
198  # Can I get the name?
199  if "Name" in description :
200  # Is it one I want?
201  if any(ext in line for ext in compsToSplit) :
202  print "Found component", items[1]
203  addingSplit = True
204  splitComp = compsToSplit.index(items[1])
205  else :
206  addingSplit = False
207  if ".Group" in description :
208  group = eval(items[1])
209 
210  # Groups need to be stored in a look-up format
211  # so I can pick up info from them later.
212  if line.startswith("JESGroup") :
213  continuing = False
214  # Can I get the name?
215  if "Name" in description :
216  newGroup = JESGroup()
217  newGroup.index = thisVal
218  newGroup.name = items[1]
219  JESGroups[thisVal] = newGroup
220  if ".Group" in description :
221  JESGroups[thisVal].groupID = eval(items[1])
222  if ".SubGroup" in description :
223  JESGroups[thisVal].groupingAssigned = eval(items[1])
224 
225  for file in outfiles :
226  file.write(line)
227 
228  for file in outfiles : file.close()
229 
230  return len(mypermutations)
231 
232 def batchSubmit(command, index, phrase) :
233 
234  # Perform setLimitsOneMassPoint on batch
235  batchcommand = command
236 
237  # Open batch script as fbatchin
238  fbatchin = open(templateScript, 'r')
239  fbatchindata = fbatchin.read()
240  fbatchin.close()
241 
242  # open modified batch script (fbatchout) for writing
243  batchtempname = '{0}/StrongReductions_BatchScript_{1}_index{2}.sh'.format(ScriptArchive,phrase,index)
244  fbatchout = open(batchtempname,'w')
245  fbatchoutdata = fbatchindata.replace("ZZZ",batchcommand) # In batch script replace ZZZ for submit command
246  fbatchout.write(fbatchoutdata)
247  modcommand = 'chmod 744 {0}'.format(batchtempname)
248  subprocess.call(modcommand, shell=True)
249  fbatchout.close()
250  submitcommand = "qsub {0}".format(batchtempname)
251  print submitcommand
252  subprocess.call(submitcommand, shell=True)
253 
254 def runMatrixCommands(nominalFile,config,outPlotName,outRootName,index,batch) :
255 
256  # If root file modification time more recent than config modification time,
257  # then we don't have to do this.
258  fileNew = False
259  if os.path.isfile(outRootName):
260  file_time = os.path.getmtime(outRootName)
261  config_time = os.path.getmtime(config)
262  if config_time < file_time : fileNew = True
263 
264  if not fileNew :
265  command = 'jetDefinition="AntiKt4EMTopo" configFiles="{0};{1}" outFile="{2}" outHistFile="{3}" bash runMakeCorrelationMatrixPlots.sh Flexible FineGridFixedEta false'.format(nominalFile,config,outPlotName,outRootName)
266  if batch :
267  batchSubmit(command, index, "Matrices")
268  else :
269  subprocess.call(command,shell=True)
270 
271 def runComparisonCommands(outFileName,plotsName,file1,file2,file3,file4,string,batch) :
272 
273  # If output canvases already exist, then unless
274  # root file modification time more recent than canvas modification time,
275  # we don't have to do this.
276  canvasExists = False
277  earliestCanvas = 1E25
278  canvases = glob.glob(outFileName.replace(".png","*.png"))
279  for canvas in canvases :
280  canvasExists = True
281  if os.path.getmtime(canvas) < earliestCanvas : earliestCanvas = os.path.getmtime(canvas)
282 
283  latestRootFile = 0
284  for rootFileName in [file1,file2,file3,file4] :
285  if not os.path.isfile(rootFileName) :
286  print "ERROR! Root file does not exist."
287  return
288  if os.path.getmtime(rootFileName) > latestRootFile : latestRootFile = os.path.getmtime(rootFileName)
289 
290  if canvasExists and latestRootFile < earliestCanvas :
291  print "Combination already calculated; skip"
292  return
293 
294  command = 'python ../python_scripts/Make4DCorrelationMatrix.py "AntiKt4EMTopo" {0} {1} false {2} {3} {4} {5}'.format(outFileName,plotsName,file1,file2,file3,file4)
295  if batch :
296  batchSubmit(command, string, "CompareSets")
297  else :
298  subprocess.call(command,shell=True)
299 
300 def main() :
301 
302  parser = argparse.ArgumentParser()
303  parser.add_argument("-d","--dir")
304  parser.add_argument("-t","--tmp")
305  parser.add_argument("-n","--npars",default=3)
306  parser.add_argument("-r","--ref")
307  parser.add_argument("-b","--batch",action='store_true')
308  parser.add_argument("-ns","--nosubmit", action='store_true')
309  parser.add_argument("-rc","--redo-configs", action='store_true')
310  parser.add_argument("-ntm","--ntestsmatrices",default=100)
311  parser.add_argument("-ntc","--ntestscomparisons",default=100)
312  parser.add_argument("--matrixStage",action='store_true')
313  parser.add_argument("--comparisonStage",action='store_true')
314  parser.add_argument("--investigateComb",default="",type=str)
315  parser.add_argument("--investigateSplits",default="",type=str)
316  args = parser.parse_args()
317  print args
318 
319  shareDir = os.getcwd()+"/../../share/"+args.dir
320  nominalFile = shareDir+"/"+args.ref
321  templateFile = shareDir+"/"+args.tmp
322  newFilesDir = ""
323  if "/" in templateFile :
324  for item in templateFile.split("/")[:-1] :
325  newFilesDir = newFilesDir+"/"+item
326 
327  nPars = args.npars
328 
329  print "sharedir",shareDir
330  print "nominal",nominalFile
331  print "template",templateFile
332  print "new configs will go in",newFilesDir
333  print "using",nPars,"parameters, not counting eta intercalibration non-closure."
334 
335  doPlots = False
336 
337  # Now we make the new configuration files.
338  configFileList = []
339  nConfigs = 0
340  if args.redo_configs and not args.investigateSplits :
341  start_time = time.time()
342  fullCombList = computeCombinations(15,3) # 15
343  index = -1
344  print("--- %s seconds ---" % (time.time() - start_time))
345  for comb in fullCombList :
346  index = index+1
347  thisname = makeNewConfigFile(templateFile,comb,index,args.npars)
348  #configFileList.append(thisname)
349  nConfigs = index+1
350  else :
351  nConfigs = len([os.path.join(newFilesDir, f) for f in os.listdir(newFilesDir) if "index" in f])
352 
353  # Begin matrix computation stage, if desired.
354  outRootDir = os.getcwd()+"/NPRootFiles/"
355  outRootFormat = outRootDir+"matrices_{0}.root"
356  if args.matrixStage and not args.investigateSplits :
357 
358  # Rather than run on all consecutive configs up to ntestsmatrices,
359  # we want configs that are not overly similar to each other.
360  # So randomly select (non-repeating) indices between 0 and the number
361  # of configs available, and run on these until the limit is reached.
362  useval = int(args.ntestsmatrices)
363  indicesToUse = random.sample(xrange(0,nConfigs), useval)
364  for index in indicesToUse :
365 
366  # We know what the config matching this index should be.
367  config = templateFile.replace("template","index{0}".format(index))
368 
369  # We have 1 config for every set of NP's we wish to examine. Now
370  # make a correlation matrix for each. This is a batch job!
371  outRootName = outRootFormat.format(index)
372  if doPlots :
373  outPlotName = os.getcwd()+"/NPPlots/matrices_{0}.pdf".format(index)
374  else :
375  outPlotName = "NONE"
376 
377  runMatrixCommands(nominalFile,config,outPlotName,outRootName,index,args.batch)
378 
379  # Begin matrix comparison stage, if desired.
380  if args.comparisonStage and not args.investigateSplits :
381 
382  # Now we need to compare sets of 4 and look for one with a low enough difference.
383  # Have to be chosen from among existing root files *which may or may not be
384  # the same as what was just run* since they get replaced only if needed.
385  rootFiles = [os.path.join(outRootDir, f) for f in os.listdir(outRootDir) if "matrices" in f]
386  indicesAvailable = []
387  for file in rootFiles :
388  index = re.findall(r'\d+', file.split("/")[-1].split("_")[-1])
389  indicesAvailable.append(eval(index[0]))
390 
391  # In special cases you may have a specific combination you know is good and you want to explore
392  # variations rather close to it to see if it can be improved upon.
393  # Call with the combination and look at others which differ by only 1 config:
394  if args.investigateComb :
395  goodComb = tuple([eval(item) for item in args.investigateComb.split(",")])
396  newList = []
397  for index in range(4) :
398  for new in random.sample(indicesAvailable,max(1,int(float(args.ntestscomparisons)/4.0))) :
399  if not new in goodComb :
400  newComb = list(goodComb)
401  newComb[index] = new
402  newList.append(tuple(newComb))
403  pool = tuple(newList)
404 
405  # Default option has no restrictions.
406  else :
407  # There are a crazy number of combinations so just pick a set of those at random
408  # if there are more than 15 of them...
409  random.shuffle(indicesAvailable)
410  if len(indicesAvailable) > 15 :
411  indicesAvailable = indicesAvailable[:15]
412 
413  # Now run on unique combinations up to a cutoff number of tries.
414  # For now this is the same as the number of individual files I created,
415  # with the assumption that my patience for this step is about equal ;)
416  pool = tuple(combinations(indicesAvailable,4))
417  #pool = tuple([tuple([116061,36885,33399,64587])])
418 
419  for indices in random.sample(pool,min(len(pool),args.ntestscomparisons)) :
420  print "Testing combination",indices
421 
422  outFileName = os.getcwd()+"/NPPlots/SR-4D-{0}-{1}-{2}-{3}.png".format(indices[0],indices[1],indices[2],indices[3])
423 
424  runComparisonCommands(outFileName,"NONE",outRootFormat.format(indices[0]),outRootFormat.format(indices[1],outRootFormat.format(indices[2])),outRootFormat.format(indices[2]),outRootFormat.format(indices[3]),"{0}-{1}-{2}-{3}".format(indices[0],indices[1],indices[2],indices[3]),args.batch)
425 
426  # Let's say we have a good combination but now we need
427  # to check out the various options for splitting configs.
428  if args.investigateSplits :
429 
430  configIndices = tuple([eval(item) for item in args.investigateSplits.split(",")])
431  print configIndices
432  splitconfigs = [templateFile.replace("template","index{0}".format(index)) for index in configIndices[1:]]
433  print splitconfigs
434  iterables = []
435  for configindex, config in zip(configIndices[1:],splitconfigs) :
436  newIndices = makeSplitConfigs(config,args.redo_configs)
437  newIterables = []
438 
439  for index in range(newIndices) :
440  outRootName = (outRootFormat.format(configindex)).replace(".root","_splitV{0}.root".format(index))
441  newconfig = config.replace(".config","_splitV{0}.config".format(index))
442  newconfig = newconfig.replace("generateConfigs","splitsConfigs")
443  newIterables.append("{0}_splitV{1}".format(configindex,index))
444 
445  runMatrixCommands(nominalFile,newconfig,"NONE",outRootName,"{0}-{1}".format(configindex,index),args.batch)
446 
447  iterables.append(newIterables)
448 
449  # Wait for matrices to finish
450  isdone = not subprocess.check_output("qstat",shell=True)
451  while not isdone :
452  time.sleep(20)
453  isdone = not subprocess.check_output("qstat",shell=True)
454 
455  #Run comparisons
456  pool = tuple(product([configIndices[0]],iterables[0],iterables[1],iterables[2]))
457 
458  for indices in random.sample(pool,min(len(pool),args.ntestscomparisons)) :
459  print "Testing combination",indices
460 
461  outFileName = os.getcwd()+"/NPPlots/SR-4D-{0}-{1}-{2}-{3}.png".format(indices[0],indices[1],indices[2],indices[3])
462 
463  runComparisonCommands(outFileName,"NONE",outRootFormat.format(indices[0]),outRootFormat.format(indices[1],outRootFormat.format(indices[2])),outRootFormat.format(indices[2]),outRootFormat.format(indices[3]),"{0}-{1}-{2}-{3}".format(indices[0],indices[1],indices[2],indices[3]),args.batch)
464 
465 if __name__ == "__main__":
466  main()
xrange
void xrange(TH1 *h, bool symmetric)
Definition: computils.cxx:515
replace
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition: hcg.cxx:307
makeStrongReductionTests.runMatrixCommands
def runMatrixCommands(nominalFile, config, outPlotName, outRootName, index, batch)
Definition: makeStrongReductionTests.py:254
makeStrongReductionTests.computeCombinations
def computeCombinations(n, m)
Definition: makeStrongReductionTests.py:58
max
#define max(a, b)
Definition: cfImp.cxx:41
vtune_athena.format
format
Definition: vtune_athena.py:14
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
makeStrongReductionTests.makeSplitConfigs
def makeSplitConfigs(config, redo=True)
Definition: makeStrongReductionTests.py:116
covarianceTool.filter
filter
Definition: covarianceTool.py:514
makeStrongReductionTests.JESGroup.name
name
Definition: makeStrongReductionTests.py:108
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
makeStrongReductionTests.makeNewConfigFile
def makeNewConfigFile(templateFile, combinations, index, npars)
Definition: makeStrongReductionTests.py:80
makeStrongReductionTests.main
def main()
Definition: makeStrongReductionTests.py:300
DerivationFramework::TriggerMatchingUtils::sorted
std::vector< typename T::value_type > sorted(T begin, T end)
Helper function to create a sorted vector from an unsorted one.
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:224
min
#define min(a, b)
Definition: cfImp.cxx:40
makeStrongReductionTests.replace_right
def replace_right(source, target, replacement, replacements=None)
Definition: makeStrongReductionTests.py:113
python.combo.permutations
def permutations(items)
Definition: combo.py:134
makeStrongReductionTests.JESGroup
Definition: makeStrongReductionTests.py:106
python.combo.combinations
def combinations(items, n)
Definition: combo.py:85
Trk::open
@ open
Definition: BinningType.h:40
makeStrongReductionTests.JESGroup.index
index
Definition: makeStrongReductionTests.py:109
makeStrongReductionTests.batchSubmit
def batchSubmit(command, index, phrase)
Definition: makeStrongReductionTests.py:232
makeStrongReductionTests.JESGroup.groupID
groupID
Definition: makeStrongReductionTests.py:110
makeStrongReductionTests.JESGroup.__init__
def __init__(self)
Definition: makeStrongReductionTests.py:107
Muon::print
std::string print(const MuPatSegment &)
Definition: MuonTrackSteering.cxx:28
makeStrongReductionTests.JESGroup.groupingAssigned
groupingAssigned
Definition: makeStrongReductionTests.py:111
readCCLHist.float
float
Definition: readCCLHist.py:83
Trk::split
@ split
Definition: LayerMaterialProperties.h:38
makeStrongReductionTests.runComparisonCommands
def runComparisonCommands(outFileName, plotsName, file1, file2, file3, file4, string, batch)
Definition: makeStrongReductionTests.py:271