5 This module contains helper functions which allow users to manipluate multiple
6 YODA files corresponding to different subsamples of a given process/generator,
7 different sources of theory uncertainty, etc. and combine them, plotting the
8 theory error bands along the way.
10 Can be used either as a standalone exectuable or the functions can be imported
11 intot a custom python script.
13 This module uses a loosely-defined datatype which shall be referred to as
14 AnalysisObject, which is effectively a dict of np.arrays and strings.
15 This encodes the information from a yoda.Scatter2D(), yoda.Histo1D(),
16 root.TGraphAsymmErrors() or root.TH1D() in the same format, and which
17 can be easily manipulated since it consists of np.arrays or strings
18 An AO has the following keys:
20 `ao['name']` the name of the object a la YODA
21 `ao['path']` the path of the object a la YODA
22 `ao['annotations']` a dict of annotations a la YODA
23 `ao['bw']` np.array of widths of each bin
24 `ao['x']` np.array of mid-points of bins
25 `ao['xup']` np.array of high bin edges
26 `ao['xdn']` np.array of low bin edges
27 `ao['y']` np.array of y values
28 `ao['yup']` np.array of y + err_up values
29 `ao['ydn']` np.array of y-err_dn values
30 `ao['nbins']` number of bins
32 This should probably be replaced by a Class in the next version !
34 For a full list of functions and their usage, type:
37 TODO (perspectives for future development):
38 - Also handle ROOT as input/output instead of YODA
39 - Replace AnalysisObject by an actual Class rather than loosely-defined dicts
40 - Implement combineVariationsReplicas()
41 - Support other types in readFromYODA
43 Author: Louie D. Corpe (CERN)
44 Email: l.corpe@cern.ch
46 from random
import randint
51 print(
"[ERROR], looks like the YODA packages are not installed. Please run this command and try again:")
52 print(
"source setupPMGSystematicsTool.sh")
57 print(
"[ERROR], looks like the YAML packages are not installed. Please run this command and try again:")
58 print(
"source setupPMGSystematicsTool.sh")
63 print(
"[ERROR], looks like the LHAPDF packages are not installed. Please run this command and try again:")
64 print(
"source setupPMGSystematicsTool.sh")
69 print(
"[ERROR], looks like the NUMPY packages are not installed. Please run this command and try again:")
70 print(
"source setupPMGSystematicsTool.sh")
76 import readDatabase
as rDB
79 from multiprocessing.dummy
import Pool
as ThreadPool
81 from array
import array
82 from collections
import OrderedDict
90 Renames the files in a given directory, such that the longest common prefix
91 which occurs in all filenames is ommitted. Useful if your GRID jobs were
92 submitted with the format <prefix>.<weights_names>.yoda and you want to
93 lose the <prefix>. bit !
97 for fn
in os.listdir(directory):
100 if len(fileList) == 1:
return
102 prefix = os.path.commonprefix(fileList)
103 for fn
in os.listdir(directory):
104 os.system(
"mv %s/%s %s/%s" % (directory, fn, directory, fn.replace(prefix,
"")))
110 `removeExtension` Bool [optional] (remove the file extension, eg .root, or .yoda)
111 `OrderedDict` of replacements to make, or txt file with one replacemement per line
115 if customReps
is None:
return name
116 mydict = OrderedDict()
117 if type(customReps)
is str:
118 f =
open(customReps,
'r')
119 for line
in f.readlines():
120 line = line.replace(
"\n",
"")
121 tokens = line.split(
"-->")
122 mydict[
str(tokens[0])] =
str(tokens[1])
123 elif type(customReps)
is OrderedDict:
126 print(
"[ERROR], customReps must be either OrderedDict or path to a txt file, not ",
type(OrderedDict))
128 name = name.replace(
"user.",
"userp")
129 name = name.replace(
".yoda",
"pyoda")
130 name = name.replace(
"yoda.",
"yodap")
131 name = name.replace(
".root",
"proot")
132 for find, rep
in mydict.items():
133 name = name.replace(find, rep)
134 name = name.replace(
"userp",
"user.")
135 name = name.replace(
"pyoda",
".yoda")
136 name = name.replace(
"yodap",
"yoda.")
137 name = name.replace(
"proot",
".root")
138 if (removeExtension
and ".root" in name): name = name.rpartition(
".root")[0]
139 if (removeExtension
and ".yoda" in name): name = name.rpartition(
".yoda")[0]
146 `removeExtension` Bool [optional] (remove the file extension, eg .root, or .yoda)
149 Converts an input string (eg a weight name) which may contain non-standard
150 characters into a string which is safe to use as a filename.
151 In particular, the following substitutions are made:
156 name = name.replace(
"user.",
"userp")
157 name = name.replace(
".yoda",
"pyoda")
158 name = name.replace(
"yoda.",
"yodap")
159 name = name.replace(
".root",
"proot")
161 name = name.replace(
r"\+",
"")
162 name = name.replace(
"userp",
"user.")
163 name = name.replace(
"pyoda",
".yoda")
164 name = name.replace(
"yodap",
"yoda.")
165 name = name.replace(
"proot",
".root")
166 if (removeExtension
and ".root" in name): name = name.rpartition(
".root")[0]
167 if (removeExtension
and ".yoda" in name): name = name.rpartition(
".yoda")[0]
173 reps[
" nominal "] =
""
174 reps[
" set = "] =
"_"
177 if escapePlus: reps[
"+"] =
r"\+"
185 for find, rep
in reps.items():
186 name = name.replace(find, rep)
195 Takes a LHAPDF ID code and figures out what is the name of the PDF Set
198 lhapdfid =
int(lhapdfid)
199 for a
in lhapdf.availablePDFSets():
200 b = lhapdf.getPDFSet(a)
201 if (lhapdfid >= b.lhapdfID)
and (lhapdfid < (b.lhapdfID + b.size)):
209 f = r.TFile.Open(path)
210 for i
in f.GetListOfKeys():
211 if sumwHistName
in i.GetName():
212 return f.Get(i.GetName()).Integral()
214 aos = yoda.read(path, patterns=
'.*/_EVTCOUNT.*')
215 if '/_EVTCOUNT' not in aos.keys():
217 elif '/_EVTCOUNT[' in aos.keys():
218 return aos[
'/_EVTCOUNT[%s]' % nominalWeight].sumW()
220 return aos[
'/_EVTCOUNT'].sumW()
223 def weightCorrection(var, nom, sampleDir="", varWeightName="", nominalWeightName="", sumwHistName=""):
225 `var` String (name of YODA file for variation to get the correction for)
226 `nom` String (name of YODA file for nominal to get correct away from)
227 `sampleDir` String [optional] (directory path for a given subsample)
228 `varWeightName` String [optional] (name of the variation to get weight correction for)
229 `nominalWeightName` String [optional] (name of the nominal variation to get weight correction with respect to)
230 `sumwHistName` String [optional] (name of the sum-of-weights TH1 in the case of root files)
233 Computes the on-the-fly weight correction of the systematic variation with
234 respect to the nominal. This is needed for YODA files because by default,
235 Rivet normalises by the sum of weights for a given instance (ie variation)
236 rather than by the nominal. This weight is used to fix the issue.
242 fn[
'var'] =
'%s' % (var)
243 fn[
'nom'] =
'%s' % (nom)
244 for k, v
in fn.items():
245 if not os.path.isfile(fn[k]):
246 fn[k] = fn[k] +
'.yoda'
247 if not os.path.isfile(fn[k]):
248 fn[k] = fn[k].
replace(
'.yoda',
'.root')
249 if not os.path.isfile(fn[k]):
250 print(
"\n[ERROR] Could not open the file:" +
repr(fn[k]))
251 print(
"[ERROR] Available files in " +
repr(sampleDir))
252 for varFile
in os.listdir(sampleDir):
263 `path` String (path to YODA file)
266 if '/_XSEC' not in yoda.read(path, patterns=
'.*/_XSEC.*').
keys():
269 return yoda.read(path, patterns=
'.*/_XSEC.*')[
'/_XSEC'].
points()[0].x()
272 def getXS(dsid, campaign=15, userFile=None):
274 `dsid` Int (dataset ID of the ATLAS dataset you want to get the
276 `campaign` String [optional] (the MC campaign number 15 for mc15_13TeV
277 datasets, 16 for mc16_13TeV. If campaign > 16 look instead for a local
278 file `data/PMGxsecDB_manual.txt` where the user can input the XS info)
281 Fetch the cross-section (XS) of the dataset whose ID you specified.
282 This function checks for the DSID in the central PMG XS database on cvmfs
283 but by setting campaign = 999 you can force it to check in a local file:
284 PMGxsecDB_manual.txt where the user can input the XS info manually.
286 if userFile
is not None:
289 if campaign > 16
or campaign < 15:
290 dataDir = os.environ[
"SYSTTOOLSPATH"] +
"/data/"
292 pmgXSFile =
"%s/PMGxsecDB_manual.txt" % dataDir
296 pmgXSFile =
"/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc%d.txt" % campaign
301 thisSampleFinalCrossSection = 0.
302 thisSampleKFactorUncertainty = 0.
304 thisSampleKFactor = 0
307 pmgXSTree = r.TTree()
308 pmgXSTree.ReadFile(pmgXSFile)
309 for dsidLine
in pmgXSTree:
310 if int(dsid) ==
int(dsidLine.dataset_number):
311 thisSampleCrossSection = dsidLine.crossSection
312 thisSampleFilterEff = dsidLine.genFiltEff
313 thisSampleKFactor = dsidLine.kFactor
315 thisSampleKFactorUncertainty = (dsidLine.relUncertUP + dsidLine.relUncertDOWN) * 0.5
317 thisSampleFinalCrossSection = 0
318 thisSampleFinalCrossSection = thisSampleCrossSection * thisSampleFilterEff * thisSampleKFactor
320 if dsidLine.kFactor != 1: hasKFactor = 1
325 print(
"[WARNING] could not find " +
str(dsid) +
" in " + pmgXSFile +
" for campaign " +
str(campaign))
327 print(
"[ERROR] could not find " +
str(dsid) +
" in " + pmgXSFile)
328 print(
"--> you'll need to add it in manually before continuing")
330 else: thisSampleFinalCrossSection, hasKFactor, thisSampleKFactorUncertainty =
getXS(dsid, campaign + 1, userFile)
332 return thisSampleFinalCrossSection, thisSampleKFactor, thisSampleKFactorUncertainty
337 `numerator` np.array()
338 `denominator` np.array()
341 Old:Avoid NaNs when doing division. Replaces NaN with 0 in the array.
342 New:Does the division 'where' denominator does not equal zero. When denominator is zero for an indice, 0 is put in the array
344 ratio = np.divide(numerator, denominator, out=np.zeros_like(numerator), where=denominator != 0)
350 `operand` String (The operand which you want to decompose into chunks)
351 `bracket` String [optional] (The type of bracket you want to avoid splitting,
353 `return` list(String)
355 Splits the operand of a formula into comma-separated chunks without splitting
356 operands of nested functions.
357 eg: Funcion1(foo, Function2(bar, foo), Function3(foo, Function4 (bar, foo)))
358 --> [foo, Function2(bar, foo), Function3(foo, Function4 (bar, foo))]
365 for c
in (operand +
","):
366 if ((c ==
"," and not isQuote)
or c == bracket[1])
and bracketLevel == 0:
367 parts.append(
"".
join(current).strip(
'"'))
373 elif c == bracket[1]:
377 isQuote =
not isQuote
380 if isQuote: current.append(c)
390 `return` list(String)
392 Take this formula written in this string and recusively optain a list of
393 basic components which is needs eg:
394 Funcion1(foo, Function2(bar, foo), Function3(foo, Function4 (bar, foo)))
395 --> [foo, bar, foo, foo, bar, foo]
398 if ")" not in formula:
406 operation = formula.partition(
"(")[0]
407 operand = formula.partition(
"(")[-1].rpartition(
")")[0]
408 if 'Value' != operation:
416 `nominal` AnalysisObject (nominal object, which gives us the centeal values.
417 The typ is a dict of np.arrays() encoding information equivalent
418 to a Scatter2D, Histo1D, TGraphAsymmErrors or TH1D. See module description
419 for more information about this format. TODO: dedicated class for this?)
420 `formula` String (The formula with which to combine the components)
421 `componentsMap` dict(filenames, AnalysisObjects) (this is a map between the
422 file which the component corresponds to and the AnalysisObject which it
424 `level` Int [optional] (keeps track of how deep in the recursiom we have gone)
425 `verbose` Int [optional] (0 or 1, whether or not to print(a lot of debug messages) )
426 `return` AnalysisObject
428 Resolves the formula iteratively, using the AnalysisObjects listed in the
429 componentsMap. resolveFormula supports the following functions:
431 `Central(arg1)`: Use the central value of arg1, and set errors to 0
433 `DownUpNominal(arg0, arg1, arg2)`: Produce a new AO where ydn is taken from
434 the central value ('y') of arg0, yup is taken from the central value of arg1,
435 and y is taken from the central value of arg2.
437 `Envelope(arg1,..., argN)`: Take the min/max envelope of the arguments as the
438 up/down errors of the resulting AO. The central value is taken from 'nominal'.
440 `Inverse(arg1)`: Take the inverse of the AO, so y--> 1/y, and the errors are
441 propagated as yerr --> |yerr/y **2|
443 `Product(arg1,..., argN)`: Take the product of the arguments of the function.
444 The 'y' value is the product of the 'y' values of the arguments, the errors
445 are propagated according the relative errors in quadrature.
448 `Average(arg1,..., argN)`: Take the product of the arguments of the function.
449 The 'y' value is the avergae of the 'y' values of the arguments, the errors
450 are taken from the first argument.
452 `QuadSum(arg1,.., argN)`: Sum in quadrature of the errors of the arguments,
453 and central value taken from nominal AO.
455 `Value(arg1)`: Used to scan an AO and its errors by the float specified in arg1.
457 `CopyError(arg1, arg2)`: Copy the up/down errors.
462 y, yup, ydn =
'y',
'yup',
'ydn'
463 if 'z' in nominal.keys():
464 y, yup, ydn =
'z',
'zup',
'zdn'
465 for i
in range(level): prefix +=
"-->"
466 if (verbose):
print(prefix +
" processing " + formula)
470 if ")" not in formula:
474 for k, v
in componentsMap.items():
475 componentName = k.split(
"/")[-1].
replace(
".yoda",
"").
replace(
".root",
"")
479 if componentName.split(
":")[0] ==
safeFileName(formula):
483 print(
"[ERROR] could not find a match for " + formula)
485 for k, v
in componentsMap.items():
486 print(k.split(
"/")[-1])
490 if (verbose):
print(prefix +
"--> result:" + result)
498 operation = formula.partition(
"(")[0]
499 operand = formula.partition(
"(")[-1].rpartition(
")")[0]
505 if operation ==
"Value":
506 arguments = [
float(operand)]
512 argumentNames += [argument]
514 if (verbose):
print(prefix +
"argument of" + operation +
": " + arg)
522 if operation ==
"QuadSum":
524 for argument
in arguments:
525 if result
is None: result = copy.deepcopy(argument)
527 if not (np.allclose(nominal[y], argument[y])):
528 print(
"[ERROR] trying to take the quadratic sum of uncertainties which don't have the same central value!")
529 print(
"If you are using an alternative PDF directly int his function, please take the difference with the nominal and use that instead!")
531 result[yup] = nominal[y] + np.sqrt((argument[yup] - nominal[y]) ** 2 + (result[yup] - nominal[y]) ** 2)
532 result[ydn] = nominal[y] - np.sqrt((argument[ydn] - nominal[y]) ** 2 + (result[ydn] - nominal[y]) ** 2)
535 elif operation ==
"Central":
536 result = copy.deepcopy(arguments[0])
537 result[yup] = result[y]
538 result[ydn] = result[y]
543 elif operation ==
"DownUpNominal":
544 if len(arguments) != 3:
545 print(
"[ERROR] DownUpNominal() must take exactly three arguments, found " + len(arguments))
547 result = copy.deepcopy(arguments[2])
548 result[yup] = copy.deepcopy(arguments[1][y])
549 result[ydn] = copy.deepcopy(arguments[0][y])
554 elif operation ==
"Product":
556 for arg
in arguments:
557 if result
is None: result = copy.deepcopy(arg)
559 centralProduct = result[y] * arg[y]
560 upError = centralProduct * np.sqrt((
safeDiv(result[y] - result[yup], result[y])) ** 2 + (
safeDiv(arg[y] - arg[yup], arg[y])) ** 2)
561 dnError = centralProduct * np.sqrt((
safeDiv(result[y] - result[ydn], result[y])) ** 2 + (
safeDiv(arg[y] - arg[ydn], arg[y])) ** 2)
562 result[y] = centralProduct
563 result[yup] = centralProduct + upError
564 result[ydn] = centralProduct - dnError
565 elif operation ==
"Average":
568 for arg
in arguments:
569 if result
is None: result = copy.deepcopy(arg)
571 result[y] = result[y] + arg[y]
572 result[y] = result[y] / N
576 elif operation ==
"Envelope":
578 for i
in range(len(arguments)):
579 argumentDict[argumentNames[i]] = arguments[i]
581 result = copy.deepcopy(nominal)
585 elif operation ==
"EnvelopeIgnoreErrors":
587 for i
in range(len(arguments)):
588 argumentDict[argumentNames[i]] = arguments[i]
590 result = copy.deepcopy(nominal)
597 elif operation ==
"Inverse":
598 if len(arguments) != 1:
599 print(
"[ERROR] Inverse() must take exactly 1 argument, found " + len(arguments))
602 result = copy.deepcopy(arg)
603 result[y] =
safeDiv(np.ones(len(arg[y])), arg[y])
604 result[yup] = result[y] + np.abs(
safeDiv(arg[y] - arg[yup], arg[y] ** 2))
605 result[ydn] = result[y] - np.abs(
safeDiv(arg[y] - arg[ydn], arg[y] ** 2))
607 elif operation ==
"Value":
608 if len(arguments) != 1:
609 print(
"[ERROR] Value() must take exactly 1 argument, found " + len(arguments))
612 result = copy.deepcopy(nominal)
613 result[y] = arg * np.ones(len(result[y]))
614 result[yup] = arg * np.zeros(len(result[y]))
615 result[ydn] = arg * np.zeros(len(result[y]))
620 print(
"[ERROR] this operation" + operation +
" is not yet supported! Please implement it in resolveFormula() before continuing")
629 `nom` AnalysisObject (of the nominal variation)
630 `variations` dict(String, AnalysisObject)
631 `pset` LHAPDF PDFSet (obtained from lhapdf.getPDFSet(pdfSetName))
632 `asym` Bool [optional] (return asymmetric errors? Or symmetrize?)
633 `return` AnalysisObject
635 Combines PDF variations according to the LHAPDF PDFSet prescription.
638 if 'z' in nom.keys(): y =
'z'
640 pdfsyst = np.array(nom_y)
643 for k
in sorted(variations.keys()):
645 if np.allclose(nom_y, var[y]):
646 variationsArray = [var[y]] + variationsArray
648 variationsArray.append(var[y])
649 if len(variationsArray) != pset.size:
650 print(
"[WARNING], there are not enough weights (%d) to evaluate this PDFSET (should have %d), skipping." % (len(variationsArray), pset.size))
652 pdfvars = np.asarray(variationsArray)
653 pdfcen = np.array([pset.uncertainty(pdfvars[:, i]).central
for i
in range(nbins)])
654 pdfup = np.array([pset.uncertainty(pdfvars[:, i]).errplus
for i
in range(nbins)])
655 pdfdn = np.array([pset.uncertainty(pdfvars[:, i]).errminus
for i
in range(nbins)])
656 pdfsym = np.array([pset.uncertainty(pdfvars[:, i]).errsymm
for i
in range(nbins)])
657 pdfrel =
safeDiv(pdfsym, pdfcen)
658 pdfrelup =
safeDiv(pdfup, pdfcen)
659 pdfreldn =
safeDiv(pdfdn, pdfcen)
660 pdfsyst = pdfrel * nom_y
661 pdfsystup = pdfrelup * nom_y
662 pdfsystdn = pdfreldn * nom_y
663 if asym:
return (nom_y, nom_y - pdfsystdn, nom_y + pdfsystup)
664 else:
return (nom_y, nom_y - pdfsyst, nom_y + pdfsyst)
669 `nom` AnalysisObject (of the nominal variation)
670 `variations` dict(String, AnalysisObject)
671 `asym` Bool [optional] (return asymmetric errors? Or symmetrize?)
672 `includeErrorsInEnvelope` Bool [optional]
673 `return` AnalysisObject
675 Take the min/max envelope of the arguments as the up/down errors of the
676 resulting AO. The central value is taken from 'nominal'.
678 y, yup, ydn =
'y',
'yup',
'ydn'
679 if 'z' in nom.keys():
680 y, yup, ydn =
'z',
'zup',
'zdn'
681 systup = np.array(nom[y])
682 systdn = np.array(nom[y])
683 for var
in variations.values():
684 if includeErrorsInEnvelope:
685 systup = map(max, zip(systup, var[yup], var[ydn]))
686 systdn = map(min, zip(systdn, var[yup], var[ydn]))
688 systup = map(max, zip(systup, var[y]))
689 systdn = map(min, zip(systdn, var[y]))
691 var_tmp = 0.5 * (abs(nom[y] - systup) + abs(nom[y] - systdn))
692 systup = nom[y] + var_tmp
693 systdn = nom[y] - var_tmp
694 return np.array(nom[y]), np.array(systdn), np.array(systup)
699 `nom` AnalysisObject (of the nominal variation)
700 `variations` dict(String, AnalysisObject)
701 `asym` Bool [optional] (return asymmetric errors? Or symmetrize?)
702 `return` AnalysisObject
704 Takes the standard deviation of the variations... in principle
705 This is a placeholder for now... needs to be implemented TODO
707 return np.array(nom[
'y']), np.array(nom[
'y']), np.array(nom[
'y'])
712 `nom` AnalysisObject (of the nominal variation)
713 `variations` dict(String, AnalysisObject)
714 `return` AnalysisObject
716 Get the alphaS variation. This is just a special case of Envelope
717 where the error is symmetrized!
724 `nom` AnalysisObject (of the nominal variation)
725 `variations` dict(String, AnalysisObject)
726 `asym` Bool [optional] (return asymmetric errors? Or symmetrize?)
727 `return` AnalysisObject
729 Combine the specified variations according to the Hession prescription
730 Central value given by nom.
733 if 'z' in nom.keys(): y =
'z'
735 systup = np.array(nom_y)
736 systdn = np.array(nom_y)
737 sumsq = np.array(nom_y) - np.array(nom_y)
738 for var
in variations:
739 sumsq += (nom_y - var) ** 2
740 systup += np.sqrt(sumsq)
741 systdn -= np.sqrt(sumsq)
743 return nom_y, systdn, systup
748 `nom` AnalysisObject (of the nominal variation)
749 `variations` dict(String, AnalysisObject)
750 `formula` String (custom formula to combine uncertainties)
751 `return` AnalysisObject
753 Combine the specified variations according to custom formula given by formula
755 y, yup, ydn =
'y',
'yup',
'ydn'
756 if 'z' in nom.keys():
757 y, yup, ydn =
'z',
'zup',
'zdn'
759 return output[y], output[ydn], output[yup]
764 `nom` AnalysisObject (of the nominal variation)
765 `return` AnalysisObject
767 Dummy function which currently just grabs the dn/up errs as the stat errs.
769 y, yup, ydn =
'y',
'yup',
'ydn'
770 if 'z' in nom.keys():
771 y, yup, ydn =
'z',
'zup',
'zdn'
772 return nom[y], nom[ydn], nom[yup]
777 `nom` AnalysisObject (of the nominal variation)
778 `val` relative size of the normalisation uncertainty
779 `return` AnalysisObject
781 Apply a k-factor normalization uncertainty
784 if 'z' in nom.keys():
786 return nom[y], nom[y] * (1 - val), nom[y] * (1 + val)
791 `filename` String (path to file which is to be read)
792 `regexFilter` String [optional] (AOs whose names match regex are processed)
793 `regexVeto` String [optional](AOs whose names match regex are NOT processed)
794 `return` dict{String, AnalysisObject}
796 decides whether to process a file as ROOT or YODA depending on the file extension
798 rootExtensions = [
'.root',
'.root1',
'.root2']
799 yodaExtensions = [
'.yoda',
'.yoda1',
'.yoda.gz']
800 if filename
is None:
return {}
801 if filename ==
"":
return {}
802 filenameSplit = filename.split(
":")
803 for rex
in rootExtensions:
804 if filenameSplit[0].endswith(rex):
806 for ye
in yodaExtensions:
807 if filenameSplit[0].endswith(ye):
809 print(
"[ERROR] could not identify input file type of %s ! please name your files with one of the following extensions:" %
repr(filenameSplit))
810 print(
"[ERROR]" +
repr(rootExtensions) +
" for ROOT files")
811 print(
"[ERROR]" +
repr(yodaExtensions) +
" for YODA files")
817 `d` TDirectory or TFile (name of file or directory to be read)
818 `basepath` String [optional] (absolute path leading to `d` in the file)
819 `return` list[String, AnalysisObject]
821 Recursively gets the list of Histogram/TGraph names inside a ROOT file directory (supports nested directories)
823 for key
in d.GetListOfKeys():
824 kname = key.GetName()
826 if d.Get(kname).
ClassName() ==
"TList":
continue
827 if d.Get(kname).
ClassName() ==
"TTree":
continue
828 for i
in getFileKeys(d.Get(kname), basepath + kname +
"/"):
831 yield basepath + kname, d.Get(kname)
836 `filename` String (path to file which is to be read)
837 `regexFilter` String [optional] (AOs whose names match regex are processed)
838 `regexVeto` String [optional](AOs whose names match regex are NOT processed)
839 `return` dict{String, AnalysisObject}
841 Open a ROOT file, read the contents and return them as AnalysisObjects.
842 Control which AOs to select/reject using the optional regexFilter/regexVeto
844 Only supports TH1D and TGraphAsymmErrors types for now. TODO Support other types.
847 filename = filename.split(
":")
848 rootfile = r.TFile.Open(filename[0])
852 if len(filename) > 1:
853 if '/' in filename[1]:
854 path = filename[1].
split(
"/")[0]
855 tfd = rootfile.Get(path)
856 for aoName
in tfd.GetListOfKeys():
857 if debug:
print(
"DEBUG open ao:", aoName)
859 aoName = aoName.GetName()
861 aoName = path +
"/" + aoName.GetName()
862 if debug:
print(
"DEBUG modified name for ao:", aoName)
864 if debug:
print(
"DEBUG regex filte =%s, veto=%s" % (regexFilter, regexVeto))
865 if (regexFilter
is not None):
866 if debug:
print(
"DEBUG re.findall(regexFilter, aoName))", re.findall(regexFilter, aoName))
867 if (aoName
not in re.findall(regexFilter, aoName)):
868 if debug:
print(
"DEBUG ao is not in finall: continue")
870 if (regexVeto
is not None):
871 if debug:
print(
"DEBUG re.findall(regexVeto, aoName))", re.findall(regexVeto, aoName))
872 if (aoName
in re.findall(regexVeto, aoName)):
873 if debug:
print(
"DEBUG ao is in finall: continue")
877 if debug:
print(
"DEBUG start AONAME replacement, filename", filename)
878 if len(filename) > 1:
879 rep = filename[1].
replace(
"!AONAME",
"")
880 filt = filename[1].
replace(
"!AONAME",
".*")
881 capt = filename[1].
replace(
"!AONAME",
"(.*)")
882 if debug:
print(
"DEBUG AONAME replacement. rep=", rep,
"filt=", filt,
"capt=", capt)
883 if debug:
print(
"DEBUG AONAME re.findall(filt, aoName)", re.findall(filt, aoName))
884 if (aoName
not in re.findall(filt, aoName)):
885 if debug:
print(
"DEBUG AONAME not in re.findall(filt, aoName): continue")
888 newAoName = re.findall(capt, aoName)[0]
890 newAoName = aoName.replace(rep,
"")
891 ao = rootfile.Get(aoName)
895 aoResult[
'name'] = aoName
896 aoResult[
'path'] = filename[0] +
'/' + aoName
898 binRange =
range(1, ao.GetNbinsX() + 1)
899 aoResult[
'info'] =
'Was_a_TH1'
900 aoResult[
'bw'] = np.array([ao.GetBinWidth(b)
for b
in binRange])
901 aoResult[
'y'] = np.array([ao.GetBinContent(b)
for b
in binRange])
902 stat = np.array([ao.GetBinError(b)
for b
in binRange])
903 aoResult[
'yup'] = (aoResult[
'y'] + stat)
904 aoResult[
'ydn'] = (aoResult[
'y'] - stat)
905 aoResult[
'x'] = np.array([ao.GetBinCenter(b)
for b
in binRange])
906 aoResult[
'xup'] = np.array([ao.GetBinLowEdge(b + 1)
for b
in binRange])
907 aoResult[
'xdn'] = np.array([ao.GetBinLowEdge(b)
for b
in binRange])
908 aoResult[
'nbins'] = len(binRange)
910 xbinRange =
range(1, ao.GetNbinsX() + 1)
911 ybinRange =
range(1, ao.GetNbinsY() + 1)
912 aoResult[
'info'] =
'Was_a_TH2'
913 aoResult[
'xbw'] = np.array([ao.GetXaxis().GetBinWidth(b)
for b
in xbinRange
for c
in ybinRange])
914 aoResult[
'ybw'] = np.array([ao.GetYaxis().GetBinWidth(c)
for b
in xbinRange
for c
in ybinRange])
915 aoResult[
'z'] = np.array([ao.GetBinContent(b, c)
for b
in xbinRange
for c
in ybinRange])
916 stat = np.array([ao.GetBinError(b, c)
for b
in xbinRange
for c
in ybinRange])
917 aoResult[
'zup'] = (aoResult[
'z'] + stat)
918 aoResult[
'zdn'] = (aoResult[
'z'] - stat)
919 aoResult[
'x'] = np.array([ao.GetXaxis().GetBinCenter(b)
for b
in xbinRange
for c
in ybinRange])
920 aoResult[
'xup'] = np.array([ao.GetXaxis().GetBinLowEdge(b + 1)
for b
in xbinRange
for c
in ybinRange])
921 aoResult[
'xdn'] = np.array([ao.GetXaxis().GetBinLowEdge(b)
for b
in xbinRange
for c
in ybinRange])
922 aoResult[
'y'] = np.array([ao.GetYaxis().GetBinCenter(c)
for b
in xbinRange
for c
in ybinRange])
923 aoResult[
'yup'] = np.array([ao.GetYaxis().GetBinLowEdge(c + 1)
for b
in xbinRange
for c
in ybinRange])
924 aoResult[
'ydn'] = np.array([ao.GetYaxis().GetBinLowEdge(c)
for b
in xbinRange
for c
in ybinRange])
925 aoResult[
'nbinsx'] = len(xbinRange)
926 aoResult[
'nbinsy'] = len(ybinRange)
927 elif type(r.TGraphAsymmErrors()) ==
type(ao):
928 binRange =
range(0, ao.GetN())
929 aoResult[
'info'] =
'Was_a_TG'
930 aoResult[
'bw'] = np.array([ao.GetErrorXlow(b) + ao.GetErrorXhigh(b)
for b
in binRange])
931 aoResult[
'y'] = np.array([ao.GetY()[b]
for b
in binRange])
932 aoResult[
'yup'] = np.array([ao.GetY()[b] + ao.GetErrorYhigh(b)
for b
in binRange])
933 aoResult[
'ydn'] = np.array([ao.GetY()[b] - ao.GetErrorYlow(b)
for b
in binRange])
934 aoResult[
'x'] = np.array([ao.GetX()[b]
for b
in binRange])
935 aoResult[
'xup'] = np.array([ao.GetX()[b] + ao.GetErrorXhigh(b)
for b
in binRange])
936 aoResult[
'xdn'] = np.array([ao.GetX()[b] - ao.GetErrorXlow(b)
for b
in binRange])
937 aoResult[
'nbins'] = len(binRange)
939 aoResult[
'tree'] = ao.GetTree()
942 result[aoName] = aoResult
944 print(
"[ERROR] no analysis objects passed your filter, or your input files are empty of TH1 or TGraph objects. Exiting")
951 `filename` String (path to file which is to be read)
952 `regexFilter` String [optional] (AOs whose names match regex are processed)
953 `regexVeto` String [optional](AOs whose names match regex are NOT processed)
954 `return` dict{String, AnalysisObject}
956 Open a YODA file, read the contents and return them as AnalysisObjects.
957 Control which AOs to select/reject using the optional regexFilter/regexVeto
961 filename = filename.split(
":")
962 patterns =
"" if regexFilter
is None else '.*' + regexFilter +
'.*'
963 unpatterns = []
if regexVeto
is None else [
'.*' + regexVeto +
'.*']
964 if patterns ==
"" and len(filename) > 1:
966 unpatterns += [
"RAW/.*",
"/REF/.*"]
967 histList = yoda.read(filename[0], patterns=patterns, unpatterns=unpatterns)
968 for aoName, ao
in histList.items():
970 if (regexFilter
is not None):
971 if (aoName
not in re.findall(regexFilter, aoName)):
continue
972 if (regexVeto
is not None):
973 if (aoName
in re.findall(regexVeto, aoName)):
continue
976 if len(filename) > 1:
977 rep = filename[1].
replace(
"!AONAME",
"")
978 filt = filename[1].
replace(
"!AONAME",
".*")
980 if (aoName
not in re.findall(filt, aoName)):
continue
981 aoName = aoName.replace(rep,
"")
984 aoResult[
'name'] = ao.name().
replace(rep,
"")
985 aoResult[
'path'] = ao.path().
replace(rep,
"")
986 aoResult[
'fname'] = filename
987 aoResult[
'annotations'] = {ann: ao.annotation(ann)
for ann
in ao.annotations()}
988 aoResult[
'annotations'][
'Path'] = aoResult[
'path']
989 if type(yoda.Histo1D()) ==
type(ao):
990 aoResult[
'bw'] = np.array([b.xWidth()
for b
in ao.bins()])
991 aoResult[
'y'] = np.array([b.sumW()
for b
in ao.bins()]) / aoResult[
'bw']
992 stat = np.array([b.sumW2()
for b
in ao.bins()]) / (aoResult[
'bw']) ** 2
993 aoResult[
'yup'] = (aoResult[
'y'] + np.sqrt(stat))
994 aoResult[
'ydn'] = (aoResult[
'y'] - np.sqrt(stat))
995 aoResult[
'x'] = np.array([b.xMid()
for b
in ao.bins()])
996 aoResult[
'xup'] = np.array([b.xMax()
for b
in ao.bins()])
997 aoResult[
'xdn'] = np.array([b.xMin()
for b
in ao.bins()])
998 aoResult[
'nbins'] = len(ao.bins())
999 elif type(yoda.Scatter2D()) ==
type(ao):
1000 aoResult[
'bw'] = np.array([b.yErrs()[1] + b.yErrs()[0]
for b
in ao.points()])
1001 aoResult[
'y'] = np.array([b.y()
for b
in ao.points()])
1002 aoResult[
'yup'] = np.array([b.y() + b.yErrs()[1]
for b
in ao.points()])
1003 aoResult[
'ydn'] = np.array([b.y() - b.yErrs()[0]
for b
in ao.points()])
1004 aoResult[
'x'] = np.array([b.x()
for b
in ao.points()])
1005 aoResult[
'xup'] = np.array([b.x() + b.xErrs()[1]
for b
in ao.points()])
1006 aoResult[
'xdn'] = np.array([b.x() - b.xErrs()[0]
for b
in ao.points()])
1007 aoResult[
'nbins'] = len(ao.points())
1008 elif type(yoda.Histo2D()) ==
type(ao):
1009 aoResult[
'xbw'] = np.array([b.xWidth()
for b
in ao.bins()])
1010 aoResult[
'ybw'] = np.array([b.yWidth()
for b
in ao.bins()])
1011 aoResult[
'z'] = np.array([b.sumW()
for b
in ao.bins()]) / (aoResult[
'xbw'] * aoResult[
'ybw'])
1012 stat = np.array([b.sumW2()
for b
in ao.bins()]) / ((aoResult[
'xbw'] * aoResult[
'ybw'])) ** 2
1013 aoResult[
'zup'] = (aoResult[
'z'] + np.sqrt(stat))
1014 aoResult[
'zdn'] = (aoResult[
'z'] - np.sqrt(stat))
1015 aoResult[
'x'] = np.array([b.xMid()
for b
in ao.bins()])
1016 aoResult[
'xup'] = np.array([b.xMax()
for b
in ao.bins()])
1017 aoResult[
'xdn'] = np.array([b.xMin()
for b
in ao.bins()])
1018 aoResult[
'y'] = np.array([b.yMid()
for b
in ao.bins()])
1019 aoResult[
'yup'] = np.array([b.yMax()
for b
in ao.bins()])
1020 aoResult[
'ydn'] = np.array([b.yMin()
for b
in ao.bins()])
1021 aoResult[
'nbinsx'] = ao.numBinsX()
1022 aoResult[
'nbinsy'] = ao.numBinsY()
1025 result[aoName] = aoResult
1027 if len(result) == 0:
1028 print(
"[ERROR] no analysis objects passed your filter, or your input files are empty of Histo1D or Scatter2D objects. Exiting")
1029 print(
"[ERROR] FYI: filename =%s, filter =%s, veto =%s " % (filename, regexFilter, regexVeto))
1037 `histDict` dict{String, AnalysisObject} (AOs to write out)
1038 `fOut` String (output file to write to)
1041 Write AOs out to a file, auto-determines root or yoda from file extension of fOut.
1043 rootExtensions = [
'.root',
'.root1',
'.root2']
1044 yodaExtensions = [
'.yoda',
'.yoda1',
'.yoda.gz']
1045 for rex
in rootExtensions:
1046 if fOut.endswith(rex):
1049 for ye
in yodaExtensions:
1050 if fOut.endswith(ye):
1053 print(
"[ERROR] could not identify input file type to write to ! Assuming YODA.")
1060 `histDict` dict{String, AnalysisObject} (AOs to write out)
1061 `fOut` String (output file to write to)
1064 Write AOs out to a YODA file.
1067 for aoKey, inAO
in histDict.items():
1068 if 'z' in inAO.keys():
continue
1069 outAO = yoda.Scatter2D()
1070 for k, v
in inAO[
'annotations'].
items():
1071 if k ==
"Type":
continue
1072 outAO.setAnnotation(k, v)
1073 for i
in range(inAO[
'nbins']):
1075 p.setX(inAO[
'x'][i])
1076 p.setY(inAO[
'y'][i])
1077 p.setErrs(1, (abs(inAO[
'xdn'][i] - inAO[
'x'][i]), abs(inAO[
'xup'][i] - inAO[
'x'][i])))
1078 p.setErrs(2, (abs(inAO[
'ydn'][i] - inAO[
'y'][i]), abs(inAO[
'yup'][i] - inAO[
'y'][i])))
1080 yoda.writeYODA(outAO, f)
1086 `histDict` dict{String, AnalysisObject} (AOs to write out)
1087 `fOut` String (output file to write to)
1090 Write AOs out to a ROOT file.
1092 f = r.TFile.Open(fOut,
"RECREATE")
1093 for aoKey, inAO
in histDict.items():
1094 if 'z' not in inAO.keys():
1095 outAO = r.TGraphAsymmErrors()
1096 outAO.SetName(aoKey)
1097 outAO.SetTitle(aoKey)
1098 if len(inAO[
'xdn']): binningInfo = [inAO[
'xdn'][0]]
1100 for xup
in inAO[
'xup']: binningInfo += [xup]
1101 binArray =
array(
'd', binningInfo)
1102 outAO_up = r.TH1D(aoKey +
"__1up", aoKey +
"__1up", len(binArray) - 1, binArray)
1103 outAO_down = r.TH1D(aoKey +
"__1down", aoKey +
"__1down", len(binArray) - 1, binArray)
1104 outAO.SetName(aoKey)
1105 outAO.SetTitle(aoKey)
1106 for i
in range(inAO[
'nbins']):
1107 outAO.SetPoint(i, inAO[
'x'][i], inAO[
'y'][i])
1108 outAO.SetPointError(i, inAO[
'x'][i] - inAO[
'xdn'][i], inAO[
'xup'][i] - inAO[
'x'][i], abs(inAO[
'y'][i] - inAO[
'ydn'][i]), abs(inAO[
'yup'][i] - inAO[
'y'][i]))
1109 outAO_up.SetBinContent(i + 1, inAO[
'yup'][i])
1110 outAO_up.SetBinError(i + 1, 0.)
1111 outAO_down.SetBinContent(i + 1, inAO[
'ydn'][i])
1112 outAO_down.SetBinError(i + 1, 0.)
1120 for y
in inAO[
'ydn']:
1121 while y
not in ybins:
1123 for x
in inAO[
'xdn']:
1124 while x
not in xbins:
1126 xbins.append(inAO[
'xup'][np.where(inAO[
'xdn'] == x)][0])
1127 ybins.append(inAO[
'yup'][np.where(inAO[
'ydn'] == y)][0])
1128 xbinArray =
array(
'd', xbins)
1129 ybinArray =
array(
'd', ybins)
1130 outAO = r.TH2D(aoKey,
"histo", len(xbinArray) - 1, xbinArray, len(ybinArray) - 1, ybinArray)
1132 outAO.SetName(aoKey)
1133 outAO.SetTitle(aoKey)
1134 for iBin
in range(len(inAO[
'z'])):
1135 ix = inAO[
'x'][iBin]
1136 iy = inAO[
'y'][iBin]
1137 xybin = outAO.GetBin(outAO.GetXaxis().
FindBin(ix), outAO.GetYaxis().
FindBin(iy))
1138 outAO.SetBinContent(xybin, inAO[
'z'][iBin])
1139 outAO.SetBinError(xybin, abs(inAO[
'z'][iBin] - inAO[
'zup'][iBin]))
1146 `fIn` String (input file to evaluate)
1147 `regexFilter` String [optional] (AOs whose names match regex are processed)
1148 `regexVeto` String [optional] (AOs whose names match regex are NOT processed)
1151 Get a rough estimate of the average symmetric relative error per bin, used to
1152 determine what order to plot the uncertaities in.
1154 averageUncertaintySizePerBin =
None
1156 for ao
in nominalHists.keys():
1158 if (regexFilter
is not None):
1159 if (ao
not in re.findall(regexFilter, ao)):
continue
1160 if (regexVeto
is not None):
1161 if (ao
in re.findall(regexVeto, ao)):
continue
1162 noms = nominalHists[ao]
1163 y, yup, ydn =
'y',
'yup',
'ydn'
1164 if 'z' in noms.keys():
1165 y, yup, ydn =
'z',
'zup',
'zdn'
1167 relativeError =
safeDiv(0.5 * (abs(noms[yup] - noms[y]) + abs(noms[ydn] - noms[y])), abs(noms[y]))
1168 if (averageUncertaintySizePerBin
is None): averageUncertaintySizePerBin = relativeError
1170 if np.average(averageUncertaintySizePerBin) != 0:
1173 if np.average(relativeError) > 100 * np.average(averageUncertaintySizePerBin):
continue
1174 averageUncertaintySizePerBin = np.append(averageUncertaintySizePerBin, relativeError)
1176 return np.average(averageUncertaintySizePerBin)
1181 `wName` String (Name of the systemtic weight to combine)
1182 `wInfo` dict in output format of the readDatabase.getWeight() tool.
1183 See https://twiki.cern.ch/twiki/bin/viewauth/AtlasProtected/PmgSystTool
1185 `regexFilter` String [optional] (AOs whose names match regex are processed)
1186 `regexVeto` String [optional] (AOs whose names match regex are NOT processed)
1189 Produce aand write a YODA file for the systematioc uncertainty wName by
1190 combining the weights listed in wInfo['weights'], according to the procedure
1191 specified in wInfo['combination'] for each AO.
1193 The following values of wInfo['combination'] are suppored: stat, lhapdf,
1194 replicas, alphas, envelope, hessian, and customFunction (in this case, the
1195 formula given in wInfo['function'] is evaluated).
1198 if wInfo[
'combination'] ==
'lhapdf':
1199 if (
'nominal_pdf' not in wInfo.keys())
or (len(wInfo[
'weights']) == 1):
1202 wInfo[
'combination'] =
'stat'
1204 nominalPDF =
int(wInfo[
'nominal_pdf'])
1206 pset = lhapdf.getPDFSet(pdfSetName)
1210 nominalHists =
readFromFile(wInfo[
'nominal'], regexFilter, regexVeto)
1211 variationHists_Var_AO = {}
1213 for var
in wInfo[
'weights']:
1214 if "Weight" in var:
continue
1215 if ":" in var
and 'yoda' in var:
1216 variationHists_Var_AO[var] =
readFromFile(var, regexFilter, regexVeto)
1218 variationHists_Var_AO[var] =
readFromFile(var, regexFilter, regexVeto)
1220 for var, aoDict
in variationHists_Var_AO.items():
1221 for ao, info
in aoDict.items():
1222 if ao
not in variationHists.keys():
1223 variationHists[ao] = {}
1224 variationHists[ao][var] = info
1227 outputHists = copy.deepcopy(nominalHists)
1228 for ao
in sorted(nominalHists.keys()):
1229 noms = nominalHists[ao]
1230 y, yup, ydn =
'y',
'yup',
'ydn'
1231 if 'z' in noms.keys():
1232 y, yup, ydn =
'z',
'zup',
'zdn'
1233 variations = variationHists[ao]
1234 systCentral = np.array(noms)
1235 systUp = np.array(noms)
1236 systDn = np.array(noms)
1240 if wInfo[
'type'] ==
'Nominal' or wInfo[
'combination'] ==
'stat':
1242 elif wInfo[
'combination'] ==
'lhapdf':
1244 if res
is None:
continue
1245 systCentral, systDn, systUp = res
1246 elif wInfo[
'combination'] ==
'replicas':
1248 elif wInfo[
'combination'] ==
'envelope':
1250 elif wInfo[
'combination'] ==
'alphaS':
1252 elif wInfo[
'combination'] ==
'hessian':
1254 elif wInfo[
'combination'] ==
'norm':
1256 elif wInfo[
'combination'] ==
'customFunction':
1259 print(
"[ERROR] combination type:" + wInfo[
'combination'] +
"is not yet supported... skipping this systematic uncertainty:" + wName)
1263 systCentral = (systCentral)
1265 outputHists[ao][y] = systCentral
1266 outputHists[ao][yup] = systUp
1267 outputHists[ao][ydn] = systDn
1271 print(
"[INFO] done combining this systematic: " + wName +
" using method: " +
repr(wInfo[
'combination']))
1272 if wInfo[
'combination'] ==
'customFunction':
1273 print(
"[INFO] customFunction = " +
repr(wInfo[
'function']))
1278 `ao` AnalysisObject (the one to convert to a TGraph)
1279 `isData` Bool [optional] (modify style of TGraph for data points)
1280 `setYErrorsToZero` Bool [optional](Might want to do this if you don't care about the
1281 error bands for some uncertainty component)
1282 `nominalAOForRatio` [optional]AnalysisObject or None (if not None, make a ratio plot
1283 with respect to this AnalysisObject)
1284 `return` ROOT.TGraphAsymmErrors()
1286 Fill a TGraphAsymmErrors from an AO, for plotting purposes.
1287 Can format as MC or Data. If nominalAOForRatio is specified (ie not None),
1288 the TGraphAsymmErrors is divided by the 'y' value of the nominalAOForRatio
1291 tg = r.TGraphAsymmErrors()
1292 for i
in range(len(ao[
'y'])):
1294 if (nominalAOForRatio
is not None):
1295 if not (nominalAOForRatio[
'y'][i] == 0.
or np.isnan(nominalAOForRatio[
'y'][i])): scaleBy = 1. / nominalAOForRatio[
'y'][i]
1299 tg.SetPoint(i, ao[
'x'][i], yVal * scaleBy)
1300 yErrUp = 0.
if (setYErrorsToZero
or np.isnan(ao[
'yup'][i] - ao[
'y'][i]))
else (ao[
'yup'][i] - ao[
'y'][i]) * scaleBy
1301 yErrDn = 0.
if (setYErrorsToZero
or np.isnan(ao[
'y'][i] - ao[
'ydn'][i]))
else (ao[
'y'][i] - ao[
'ydn'][i]) * scaleBy
1302 tg.SetPointError(i, ao[
'x'][i] - ao[
'xdn'][i],
1303 ao[
'xup'][i] - ao[
'x'][i],
1310 tg.SetMarkerStyle(20)
1318 `aoName` String (name to access plot info for)
1319 `pathInRivetEnv` String (.plot file where to get the plot info from)
1320 `return` dict{String, String} (the list of plotting specifications from the
1321 .plot file, as a dict)
1323 Rivet uses a separate .plot file to format plots. We want to use the same
1324 information to format our plots, and this function is a helper to retrieve
1325 that info in a more usable format from a given .plot file.
1327 f =
open(pathInRivetEnv,
'r')
1330 for line
in f.readlines():
1333 regex = line.split()[-1].
replace(
"..",
".*") +
".*"
1335 match = re.findall(regex, aoName)
1337 print(
"[WARNING] this line in .plot does not provide a valid regex..: " + line)
1339 if len(match) > 0: inBlock =
True
1344 k = line.partition(
"=")[0]
1345 v = line.partition(
"=")[-1]
1352 `unsafeLatex` String (unsafe Latex string to be converted)
1353 `return` String (safe TLatex string which can be used on ROOT plots)
1355 TLatex is not quite the same as regular latex, and won't compiled properly
1356 out of the box unless a few changes are made. This function does that
1357 hopefully in the majority of cases! No promises though... *sigh*
1359 unsafeLatex = unsafeLatex.replace(
"$",
"")
1360 unsafeLatex = unsafeLatex.replace(
"\\",
"#")
1361 unsafeLatex = unsafeLatex.replace(
"^#text",
"^")
1362 while "#text" in unsafeLatex:
1363 operand = unsafeLatex.partition(
"#text{")[-1].
partition(
"}")[0]
1364 unsafeLatex = unsafeLatex.replace(
"#text{%s}" % operand, operand)
1365 while "#mathrm" in unsafeLatex:
1366 operand = unsafeLatex.partition(
"#mathrm{")[-1].
partition(
"}")[0]
1367 unsafeLatex = unsafeLatex.replace(
"#mathrm{%s}" % operand, operand)
1368 while "#dfrac" in unsafeLatex:
1369 unsafeLatex = unsafeLatex.replace(
"}{",
" / ")
1370 right = unsafeLatex.partition(
"#dfrac{")[-1]
1372 bracketLeunsafeLatexel = 0
1374 if item ==
"{": bracketLeunsafeLatexel += 1
1376 if bracketLeunsafeLatexel == 0:
1378 bracketLeunsafeLatexel -= 1
1380 text =
"".
join(text)
1381 unsafeLatex = unsafeLatex.replace(
"#dfrac{" + text +
"}", text)
1383 missingBrackets = re.findall(
r"\^.", unsafeLatex)
1384 for mb
in missingBrackets:
1385 if "{" in mb:
continue
1386 if "#" in mb:
continue
1387 unsafeLatex = unsafeLatex.replace(mb, mb.replace(
"^",
"^{") +
"}")
1388 missingBrackets = re.findall(
r"\_.", unsafeLatex)
1389 for mb
in missingBrackets:
1390 if "{" in mb:
continue
1391 if "#" in mb:
continue
1392 unsafeLatex = unsafeLatex.replace(mb, mb.replace(
"_",
"_{") +
"}")
1393 if "#ge" in unsafeLatex
and "#geq" not in unsafeLatex:
1394 unsafeLatex = unsafeLatex.replace(
"#ge",
"#geq")
1395 unsafeLatex = unsafeLatex.replace(
"#ell",
"l")
1396 unsafeLatex = unsafeLatex.replace(
"#cos",
"cos ")
1397 unsafeLatex = unsafeLatex.replace(
"#sin",
"sin ")
1402 def makeDummyHisto(tg, isLog=False, XandYMinAndMax=None, ratioZoom=None, isRatio=False):
1404 `tg` TGraphAsymmErrors (use this to build the dummy TH1D)
1405 `isLog` Bool [optional] (Max/Min will need to be adjusted differently
1406 in case of a log plot)
1407 `XandYMinAndMax` [Float, Float, Float, Float] or None (if not None, use this factor as the low/high limits of the
1409 `ratioZoom` [Float, Float] or None (if not None, use this factor as the low/high limits of the
1411 `isRatio` Bool [optional] (Specify whether or not this is a ratio plot)
1412 `return` TH2D (with appropriate max/min on each axis)
1414 In order to control better what axis ranges to use, construct a dummy TH1D
1415 with the desired max/min on each axis to be able to show the TGraphs nicely
1422 xmin = tg.GetX()[0] - tg.GetErrorXlow(0)
1423 xmax = tg.GetX()[nBins - 1] + tg.GetErrorXhigh(nBins - 1)
1424 ymin =
min([tg.GetY()[i] - tg.GetErrorYlow(i)
for i
in range(nBins)])
1425 ymax =
max([tg.GetY()[i] + tg.GetErrorYhigh(i)
for i
in range(nBins)])
1428 if ymin < 0: ymin =
min([tg.GetY()[i]
for i
in range(nBins)])
1429 multiplierUp = ymax / ymin
if ymin > 0
else ymax
1432 ymin *= multiplierDn
1433 ymax *= multiplierUp
1435 ymax *= multiplierUp
1436 if ratioZoom
is not None:
1438 ymin = ratioZoom[0] + 0.001
1439 ymax = ratioZoom[1] - 0.001
1443 av = (abs(1 - ymin) + abs(1 - ymax)) / 2
1447 multiplierDn = 1. / 1.1
1448 ymin *= multiplierDn
1449 ymax *= multiplierUp
1450 if XandYMinAndMax
is not None:
1451 if XandYMinAndMax[0]
is not None: xmin =
float(XandYMinAndMax[0])
1452 if XandYMinAndMax[1]
is not None: xmax =
float(XandYMinAndMax[1])
1453 if XandYMinAndMax[2]
is not None: ymin =
float(XandYMinAndMax[2])
1454 if XandYMinAndMax[3]
is not None: ymax =
float(XandYMinAndMax[3])
1455 res = r.TH2D(
'dummy',
'', nBins, xmin, xmax, nBins, ymin, ymax)
1460 def makeSystematicsPlotsWithROOT(mergedSystDict, outdir, nominalName="Nominal", ratioZoom=None, regexFilter=None, regexVeto=None, label="", plotInfo=None):
1462 `mergedSystDict` dict{String, String} (format is as follows {N!label:filename},
1463 where N is the order in which to plot the variations, labels is what to put on
1464 the legend, and filename is the name of the YODA/ROOT file to use as input)
1465 `outdir` String (the output dir for the plots)
1466 `nominalName` String (which entry of mergedSystDict is the nominal? One of the
1467 entries in mergedSystDict should have this as a label)
1468 `ratioZoom` [Float, Float] or None (if not None, use this factor as the low/high limits of the
1470 `regexFilter` String [optional] (AOs whose names match regex are processed)
1471 `regexVeto` String [optional](AOs whose names match regex are NOT processed)
1472 `label` String [optional](additional label to add to the plots name)
1473 `plotInfo` String [optional](path to plot info file (will try to find it dynamically if not specified)
1474 `return` dict of output plots
1476 This is a generic macro which will make the output plots of the AOs contained
1477 in the files listed in mergedSystDict.
1480 r.gStyle.SetOptStat(00000)
1484 RIVET_ANALYSIS_PATH = os.environ.get(
"RIVET_ANALYSIS_PATH")
1485 os.system(
"mkdir -p %s" % (outdir))
1489 fileNames = [
"%s:'%s'" % (k, v)
for k, v
in sorted(mergedSystDict.items())]
1496 colorList = [r.kMagenta, r.kRed, r.kOrange, r.kYellow, r.kGreen, r.kBlue,
1497 r.kViolet, r.kPink, r.kSpring, r.kTeal, r.kCyan,
1505 nominalForRatio =
None
1506 for fn
in fileNames:
1507 if nominalName
in fn:
1508 nominalForRatio = copy.deepcopy(
readFromFile(fn.split(
":")[0]))
1509 if nominalForRatio
is None:
1510 print(
"WARNING, could not identify nominal ! exit()")
1514 for fn
in fileNames:
1516 aosThisFN =
readFromFile(fn.split(
":")[0], regexFilter, regexVeto)
1517 for aoName
in aosThisFN.keys():
1518 if "superAO" in aoName:
continue
1519 aoNameNoRef = aoName.replace(
"/REF",
"")
1520 ao = aosThisFN[aoName]
1521 if aoName
not in aos.keys():
1522 aos[aoNameNoRef] = {}
1523 aos_ratio[aoNameNoRef] = {}
1526 setYErrorsToZero =
False
1528 setYErrorsToZero =
True
1532 if aoName
in nominalForRatio.keys():
1533 nfr = nominalForRatio[aoName]
1534 elif "/REF" + aoName
in nominalForRatio.keys():
1535 nfr = nominalForRatio[
"/REF" + aoName]
1537 aos_ratio[aoNameNoRef][fn] =
arrayDictToTGraph(ao,
False, setYErrorsToZero, nfr)
1540 dataDir = os.environ[
"SYSTTOOLSPATH"] +
"/data/"
1541 rivetAnalysis = aoName.split(
"/")[1]
if len(aoName.split(
"/")) > 1
else ""
1542 pathInRivetEnv =
"%s/Rivet/%s.yoda" % (RIVET_ANALYSIS_PATH, rivetAnalysis)
1545 if fnIndex == len(fileNames) - 1
and 'Data' not in nominalName:
1548 if os.path.isfile(pathInRivetEnv):
1549 refDataPath = pathInRivetEnv
1550 elif os.path.isfile(
"%s.yoda" % rivetAnalysis):
1551 refDataPath =
"%s.yoda" % rivetAnalysis
1552 elif os.path.isfile(
"%s/%s.yoda" % (dataDir, rivetAnalysis)):
1553 refDataPath =
"%s/%s.yoda" % (dataDir, rivetAnalysis)
1559 theAOs =
readFromFile(refDataPath, regexFilter, regexVeto)
1561 if "/REF" + aoNameNoRef
in theAOs.keys()
or aoNameNoRef
in theAOs.keys():
1562 if "/REF" + aoNameNoRef
in theAOs.keys():
1563 theAO = theAOs[
"/REF" + aoNameNoRef]
1564 elif aoNameNoRef
in theAOs.keys():
1565 theAO = theAOs[aoNameNoRef]
1575 print(
"[WARNING] could not find AO with name " + aoNameNoRef +
" or " +
"REF" + aoNameNoRef +
" in REF file " + refDataPath)
1576 print(
" Perhaps due to a version mis-match where the AO names were changed... skip this data file for now")
1578 if plotInfo
is not None:
1579 plotInfoDict[aoNameNoRef] =
getPlotInfo(aoNameNoRef, plotInfo)
1580 elif os.path.isfile(pathInRivetEnv.replace(
"yoda",
"plot")):
1581 plotInfoDict[aoNameNoRef] =
getPlotInfo(aoNameNoRef, pathInRivetEnv.replace(
"yoda",
"plot"))
1582 elif os.path.isfile(
"%s.plot" % rivetAnalysis):
1583 plotInfoDict[aoNameNoRef] =
getPlotInfo(aoNameNoRef,
"%s.plot" % rivetAnalysis)
1584 elif os.path.isfile(
"%s/%s.plot" % (dataDir, rivetAnalysis)):
1585 plotInfoDict[aoNameNoRef] =
getPlotInfo(aoNameNoRef,
"%s/%s.plot" % (dataDir, rivetAnalysis))
1588 for k, v
in plotInfoDict.items():
1592 for aoName
in aos.keys():
1593 if 'RAW' in aoName:
continue
1596 for aoName
in aoNames:
1597 print(
"[INFO] processing ao %d/%d: %s " % (aoNames.index(aoName), len(aoNames), aoName))
1599 aoNameSafe = aoName.replace(
"/",
"__")
1602 nFilesToProcess = len(fileNames)
1603 leg = r.TLegend(0.17, 0.9 - 0.1 * ((2 + nFilesToProcess) / 2), 0.88, 0.80)
1611 latexes.append([11, 17, 0., 0.15, 0.905,
"#scale[1.2]{#bf{#it{ATLAS}}} Internal"])
1612 latexes.append([33, 15, 90., 0.005, 0.35,
safeRootLatex(
"Ratio to %s" % nominalName)])
1613 latexes.append([31, 15, 0., 0.9, 0.905, aoName])
1619 tc = r.TCanvas(aoNameSafe, aoNameSafe, 500, 500)
1620 pad1 = r.TPad(
"pad1",
"pad1", 0.0, 0.30, 1.0, 0.97)
1621 pad2 = r.TPad(
"pad2",
"pad2", 0.0, 0.05, 1.0, 0.35)
1622 r.SetOwnership(pad1,
False)
1623 r.SetOwnership(pad2,
False)
1624 pad1.SetLeftMargin(0.15)
1625 pad2.SetLeftMargin(0.15)
1626 pad1.SetBottomMargin(0.09)
1627 pad2.SetTopMargin(0.0)
1628 pad2.SetBottomMargin(0.15)
1637 rivetAnalysis = aoName.split(
"/")[1]
if len(aoName.split(
"/")) > 1
else ""
1638 averageErrorSize = {}
1639 for fn
in fileNames:
1643 fileNamesToProcess = []
1644 for k, v
in sorted(averageErrorSize.items(), key=
lambda x: x[1], reverse=
True):
1645 fileNamesToProcess += [k]
1647 if rivetAnalysis
in aos[aoName].
keys():
1648 fileNamesToProcess += [rivetAnalysis]
1652 for fn
in fileNamesToProcess:
1658 if ":" in fn
and '[Data]' not in fn:
1660 legendName =
"%s" % fn.split(
":")[1].
replace(
"'",
"")
1663 legendName =
"[Data]"
1665 legendName +=
" MC Stat."
1667 aos[aoName][fn].SetLineStyle(2)
1668 aos_ratio[aoName][fn].SetLineStyle(2)
1669 if (
"PDF" in fn)
and (
'Nominal' not in fn):
1671 if len(re.findall(
r'PDF\d+', fn)) > 0:
1672 pdfstring = re.findall(
r'PDF\d+', fn)[0].
replace(
"PDF",
"")
1673 if len(pdfstring) > 2:
1675 legendName +=
" " + pdfsetName.partition(
"_")[0]
1676 legendName = (legendName[:20] +
'...')
if len(legendName) > 20
else legendName
1677 if 'Total Uncertainty' in fn:
1678 aos[aoName][fn].SetFillStyle(3004)
1679 aos[aoName][fn].SetFillColorAlpha(r.kBlack, 0.5)
1680 aos[aoName][fn].SetLineColor(r.kBlack)
1681 aos_ratio[aoName][fn].SetFillStyle(3004)
1682 aos_ratio[aoName][fn].SetFillColorAlpha(r.kBlack, 0.5)
1683 aos_ratio[aoName][fn].SetLineColor(r.kBlack)
1684 elif legendName !=
"[Data]":
1685 aos[aoName][fn].SetFillColorAlpha(colorList[fnIndex], 0.5)
1686 aos[aoName][fn].SetLineColor(colorList[fnIndex])
1687 aos_ratio[aoName][fn].SetFillColorAlpha(colorList[fnIndex], 0.5)
1688 aos_ratio[aoName][fn].SetLineColor(colorList[fnIndex])
1690 aos[aoName][fn].SetMarkerSize(1)
1691 aos[aoName][fn].SetMarkerStyle(20)
1692 aos[aoName][fn].SetLineWidth(3)
1693 aos_ratio[aoName][fn].SetMarkerSize(1)
1694 aos_ratio[aoName][fn].SetMarkerStyle(20)
1695 aos_ratio[aoName][fn].SetLineWidth(3)
1700 oldRatioZoom = ratioZoom
1702 if aoName
not in plotInfoDict.keys():
1703 plotInfoDict[aoName] = {}
1704 if 'LogY' not in plotInfoDict[aoName].
keys():
1705 plotInfoDict[aoName][
'LogY'] = 0
1708 pad1.SetLogy(
int(plotInfoDict[aoName][
'LogY']))
1709 if 'LogX' not in plotInfoDict[aoName].
keys():
1710 plotInfoDict[aoName][
'LogX'] = 0
1714 pad1.SetLogx(
int(plotInfoDict[aoName][
'LogX']))
1715 pad2.SetLogx(
int(plotInfoDict[aoName][
'LogX']))
1719 XandYMinAndMax = [
None,
None,
None,
None]
1720 XandYMinAndMax_ratio = [
None,
None,
None,
None]
1722 if "XMin" in plotInfoDict[aoName].
keys():
1723 XandYMinAndMax[0] = plotInfoDict[aoName][
'XMin']
1724 XandYMinAndMax_ratio[0] = plotInfoDict[aoName][
'XMin']
1726 if "XMax" in plotInfoDict[aoName].
keys():
1727 XandYMinAndMax[1] = plotInfoDict[aoName][
'XMax']
1728 XandYMinAndMax_ratio[1] = plotInfoDict[aoName][
'XMax']
1730 if "YMin" in plotInfoDict[aoName].
keys():
1731 XandYMinAndMax[2] = plotInfoDict[aoName][
'YMin']
1732 if "YMax" in plotInfoDict[aoName].
keys():
1733 XandYMinAndMax[3] = plotInfoDict[aoName][
'YMax']
1735 if "RatioPlotYMin" in plotInfoDict[aoName].
keys():
1736 if ratioZoom
is None: ratioZoom = [0., 2.]
1737 ratioZoom[0] =
float(plotInfoDict[aoName][
'RatioPlotYMin'])
1738 if "RatioPlotYMax" in plotInfoDict[aoName].
keys():
1739 if ratioZoom
is None: ratioZoom = [0., 2.]
1740 ratioZoom[1] =
float(plotInfoDict[aoName][
'RatioPlotYMax'])
1742 dummyHisto =
makeDummyHisto(aos[aoName][fn],
int(plotInfoDict[aoName][
'LogY']), XandYMinAndMax)
1743 dummyHisto_ratio =
makeDummyHisto(aos_ratio[aoName][fn],
False, XandYMinAndMax, ratioZoom,
True)
1744 ratioZoom = oldRatioZoom
1748 dummyHisto_ratio.Draw(
"")
1749 dummyHisto.GetYaxis().SetLabelFont(43)
1750 dummyHisto.GetYaxis().SetLabelSize(15)
1751 dummyHisto.GetXaxis().SetLabelFont(43)
1752 dummyHisto.GetXaxis().SetLabelSize(0)
1753 dummyHisto_ratio.GetXaxis().SetLabelFont(43)
1754 dummyHisto_ratio.GetXaxis().SetLabelSize(15)
1755 dummyHisto_ratio.GetYaxis().SetLabelFont(43)
1756 dummyHisto_ratio.GetYaxis().SetLabelSize(15)
1759 for k, v
in plotInfoDict[aoName].
items():
1760 if k ==
'XTwosidedTicks':
1761 pad1.SetTickx(
int(v))
1762 pad2.SetTickx(
int(v))
1763 if k ==
'YTwosidedTicks':
1764 pad1.SetTicky(
int(v))
1765 pad2.SetTicky(
int(v))
1766 if k ==
'XCustomMajorTicks':
1767 for i
in range(1, dummyHisto.GetNbinsX() + 1):
1768 dummyHisto.GetXaxis().SetBinLabel(i,
"")
1769 dummyHisto_ratio.GetXaxis().SetBinLabel(i,
safeRootLatex(v.split()[2 * i - 1]))
1782 aos[aoName][fn].Draw(
"pe")
1784 aos_ratio[aoName][fn].Draw(
"pe")
1785 leg.AddEntry(aos[aoName][fn], legendName,
"pe")
1786 elif aos[aoName][fn].GetN() > 1:
1787 aos[aoName][fn].Draw(
"l3")
1789 aos_ratio[aoName][fn].Draw(
"l3")
1790 leg.AddEntry(aos[aoName][fn], legendName,
"lf")
1792 aos[aoName][fn].Draw(
"e")
1793 aos[aoName][fn].Draw(
"e2")
1795 aos_ratio[aoName][fn].Draw(
"e")
1796 aos_ratio[aoName][fn].Draw(
"e2")
1797 leg.AddEntry(aos[aoName][fn], legendName,
"lf")
1802 aos[aoName][fn].Draw(
"pe")
1804 aos_ratio[aoName][fn].Draw(
"pe")
1805 leg.AddEntry(aos[aoName][fn], legendName,
"pe")
1810 if aos[aoName][fn].GetN() > 1:
1811 aos[aoName][fn].Draw(
"l3")
1813 aos_ratio[aoName][fn].Draw(
"l3")
1815 aos[aoName][fn].Draw(
"e")
1816 aos[aoName][fn].Draw(
"e2")
1818 aos_ratio[aoName][fn].Draw(
"e")
1819 aos_ratio[aoName][fn].Draw(
"e2")
1821 if 'altPDF' in fn: legendMarker =
"l"
1822 leg.AddEntry(aos[aoName][fn], legendName, legendMarker)
1828 for latex
in latexes:
1829 lat.SetTextAlign(latex[0])
1830 lat.SetTextSize(latex[1])
1831 lat.SetTextAngle(latex[2])
1832 lat.DrawLatex(latex[3], latex[4], latex[5])
1835 tc.Print(
"%s/%s%s.pdf" % (outdir, label, aoNameSafe))
1836 tc.Print(
"%s/%s%s.png" % (outdir, label, aoNameSafe))
1837 outputPlots += [
"%s/%s%s.pdf" % (outdir, label, aoNameSafe)]
1841 def makeSystematicsPlotsWithRIVET(mergedSystDict, plotsDir, nominalName="Nominal", ratioZoom=None, regexFilter=None, regexVeto=None, label="", plotInfo=None, normalize=False):
1843 `mergedSystDict` dict{String, String} (format is as follows {N!label:filename},
1844 where N is the order in which to plot the variations, labels is what to put on
1845 the legend, and filename is the name of the YODA/ROOT file to use as input)
1848 Make some ugly plots using modified rivet make-plots.
1850 print(
"[INFO] Compare histos")
1851 fileNames = mergedSystDict.keys()
1852 print(
"[INFO] fileNames" +
repr(fileNames))
1853 colors = [
"GREEN",
"PURPLE",
"YELLOW",
"RED",
"GREY",
"BLUE",
"ORANGE"]
1856 plotText +=
'MBLUE="blue!40!white" ; '
1857 plotText +=
'MGREEN="green!40!white" ; '
1858 plotText +=
'MRED="red!40!white" ; '
1859 plotText +=
'DBLUE="blue!60!white" ; '
1860 plotText +=
'DGREEN="green!60!white" ; '
1861 plotText +=
'DRED="red!60!white" ; '
1862 plotText +=
'MYELLOW="yellow!40!white" ; '
1863 plotText +=
'DYELLOW="yellow!60!white" ; '
1864 plotText +=
'MORANGE="orange!40!white" ; '
1865 plotText +=
'DORANGE="orange!60!white" ; '
1866 plotText +=
'MGREY="black!60!white" ; '
1867 plotText +=
'DGREY="black!40!white" ; '
1868 plotText +=
'MPURPLE="purple!60!white" ; '
1869 plotText +=
'DPURPLE="purple!40!white" ; '
1871 plotText +=
'COMMON_TAGS="RatioPlotSameStyle=1:ErrorBars=0:ErrorBands=1:ErrorBandOpacity=0.3:NormalizeToIntegral=1" ; '
1872 plotText +=
" rivet-mkhtml --errs "
1874 plotText +=
'COMMON_TAGS="RatioPlotSameStyle=1:ErrorBars=0:ErrorBands=1:ErrorBandOpacity=0.3"; '
1875 plotText +=
" rivet-mkhtml --reftitle=Data --errs "
1876 for kn, lab
in mergedSystDict.items():
1877 if "Nominal" in lab:
1878 plotText +=
' %s:RatioPlotSameStyle=1:ErrorBars=1:LineColor=black:"Title=%s" ' % (kn, lab)
1879 elif "PDF_ME" in lab:
1881 plotText +=
' %s:$COMMON_TAGS:ErrorBandColor=$M%s:LineColor=%s:"Title=%s" ' % (kn, col, col.lower(), lab)
1882 elif "scale_ME" in lab:
1884 plotText +=
' %s:$COMMON_TAGS:ErrorBandColor=$D%s:LineColor=%s:"Title=%s" ' % (kn, col, col.lower(), lab)
1885 elif "alphaS" in lab:
1887 plotText +=
' %s:$COMMON_TAGS:ErrorBandColor=$D%s:LineColor=%s:"Title=%s" ' % (kn, col, col.lower(), lab)
1888 elif "altPDF" in lab:
1889 col = colors[cc % len(colors)]
1891 plotText +=
' %s:LineStyle=dashed:RatioPlotSameStyle=1:ErrorBars=0:LineColor=%s:"Title=%s" ' % (kn, col.lower(), lab)
1892 elif "Total" in lab:
1894 plotText +=
' %s:$COMMON_TAGS:ErrorBandStyle=hlines:ErrorBandColor=$D%s:LineColor=$M%s:"Title=%s" ' % (kn, col, col, lab)
1896 col = colors[cc % len(colors)]
1898 plotText +=
' %s:$COMMON_TAGS:ErrorBandStyle=hlines:ErrorBandColor=$D%s:LineColor=%s:"Title=%s" ' % (kn, col, col.lower(), lab)
1899 plotText =
'%s -o %s \n' % (plotText,
"tmp")
1901 os.system(
"mkdir -p tmp")
1902 os.system(
"mkdir -p %s" % (plotsDir))
1905 for dirname
in os.listdir(
"tmp"):
1907 dirname =
"tmp/" + dirname
1908 if os.path.isdir(dirname):
1909 for f
in os.listdir(dirname):
1910 if "pdf" not in f:
continue
1911 fpng = f.replace(
"pdf",
"png")
1912 fnew = ananame +
"__" + f
1913 fnewpng = ananame +
"__" + fpng
1914 f = dirname +
"/" + f
1915 res += [
"%s/%s" % (plotsDir, fnew)]
1916 fpng = dirname +
"/" + fpng
1917 os.system(
"convert -flatten -density 150 %s %s &> /dev/null " % (f, fpng))
1918 os.system(
"mv %s %s/%s" % (f, plotsDir, fnew))
1919 os.system(
"mv %s %s/%s" % (fpng, plotsDir, fnewpng))
1920 os.system(
"rm -r tmp")
1921 print(
"your plots are here:" + plotsDir)
1927 `systWeights` list(String) (list of weight types which are avaliable
1928 for a given dsid, to identify the correct combination recipe)
1929 `combinationRecipeName` String [optional] (specify if you want to use a
1930 specific comboination uncertainty, otherwise, it will try to auto-
1931 determine frim the systWeights)
1932 `combinationRecipeFile` String [optional] (specify if the target file is in
1933 a different location to $SYSTTOOLSPATH/data/Syst_Database.yaml
1934 `return` String, dict{components, formula}
1936 Each sample type should have a unique set of weight types/names which allow us
1937 to identify in the Syst_Database.yaml what the correct combination recipe is.
1939 if len(systWeights) == 1:
1942 print(
"[WARNING] the provided systWeights only contain one component, assuming this is nominal and moving on")
1943 return {}, {
'combination': systWeights[0]}
1945 dataBaseDir = os.environ[
"SYSTTOOLSPATH"]
1946 if combinationRecipeFile
is not None:
1947 data = yaml.load(
open(combinationRecipeFile,
'r'))
1949 data = yaml.load(
open(
'%s/data/Syst_Database.yaml' % dataBaseDir,
'r'))
1950 for recipe, details
in data.items():
1951 if combinationRecipeName
is not None:
1952 if recipe == combinationRecipeName:
1953 if set(details[
'components']).issubset(
set(systWeights)):
1954 return recipe, details
1956 print(
"[Warning] requested combination recipe" + combinationRecipeName +
'uses weights ' +
repr(details[
'components']) +
'which are not all present in ' + systWeights)
1957 return recipe, details
1959 if sorted(details[
'components']) ==
sorted(systWeights):
1960 return recipe, details
1962 print(
"[WARNING] could not find systematics recipe for samples with the following ME Weights" +
repr(systWeights))
1966 def combineAllVariations(weightList, indir, outdir, regexFilter=None, regexVeto=None, combinationRecipe=None, returnOnlyVariationsInComination=True, schema="!INDIR/!INFILE.yoda:!AONAME[!WEIGHTNAME]
", orderedWeights=None, inFile=None, customWeightNameReplacements=None):
1968 `weightList` dict in output format of the readDatabase.getWeight() tool.
1969 See https://twiki.cern.ch/twiki/bin/viewauth/AtlasProtected/PmgSystTool
1971 `indir` String (input dir, where to find samples for each weight (merged
1972 across jobs/DSIDs if needed)
1973 `outdir` String (output dir, where to write the output files where the
1974 individual weights are combined into systematic variations)
1975 `regexFilter` String [optional] (AOs whose names match regex are processed)
1976 `regexVeto` String [optional](AOs whose names match regex are NOT processed)
1977 `combinationRecipe` String [optional] (if you want to use a specific
1978 combination recipe, specify it here. If None, then the code will try to
1979 auto-determine the correct recipe from the $SYSTTOOLSPATH/data/Syst_Database.yaml.
1980 supports String:String where the first part is the yaml file containing the recipe)
1981 `returnOnlyVariationsInComination` Bool [optional] (by default the code will return
1982 only a list of files for variations which take part in the combination. Set to False
1983 to return all possible variations)
1984 `schema` String [optional] (tells the code how the naming convention for histograms is
1985 structured within the input file.)
1986 `schema` List[weight names] [optional] (if the `!WEIGHTINDEX!` keyword is used in `schema`
1987 this option is needed so the code can convert from weight name to index)
1988 `inFile` String [optional] (needed if the `!INFILE!` keyword is used in `schema`)
1989 `customWeightNameReplacements` OrderedDict of custom replacements to sequentially apply
1990 to weight names, or txt file with one replacement per line, separated by "-->"
1991 `return` dict{String, String} (format is as follows {N!label:filename},
1992 where N is the order in which to plot the variations, labels is what to put on
1993 the legend, and filename is the name of the YODA/ROOT file to use as input)
1995 Using the information provided by the readDatabase tool (see
1996 https://twiki.cern.ch/twiki/bin/viewauth/AtlasProtected/PmgSystTool),
1997 combine individual YODA/ROOT files for each Matrix Element weights into
1998 individual YODA/ROOT files for each systematic uncertainty.
2000 os.system(
"mkdir -p %s" % outdir)
2003 weightsToProcess = []
2007 averageErrorSize = {}
2008 weightList = copy.deepcopy(weightList)
2013 for k, v
in weightList.items():
2015 isRoot =
False if 'root' not in schema
else 'True'
2016 for oldWeight
in v[
'weights']:
2018 fileName = schema.replace(
"!NSFWEIGHTNAME", oldWeight)
2019 fileName = fileName.replace(
"!WEIGHTNAME",
safeFileName(oldWeight, removeExtension=
True))
2020 fileName = fileName.replace(
"!CRWEIGHTNAME",
customReplacements(oldWeight, removeExtension=
True, customReps=customWeightNameReplacements))
2021 fileName = fileName.replace(
"!INDIR", indir)
2022 if orderedWeights
is not None: fileName = fileName.replace(
"!WEIGHTINDEX",
"%d" % orderedWeights.index(oldWeight))
2023 if inFile
is not None:
2024 fileName = fileName.replace(
"!INFILE", inFile)
2025 fileName = fileName.replace(
".root.root",
".root")
2026 fileName = fileName.replace(
".yoda.yoda",
".yoda")
2027 if(os.path.isfile(fileName.split(
":")[0])):
2028 weights += [fileName]
2029 fOut[oldWeight] = fileName
2031 print(
"[ERROR] couldn't find file of name " + fileName +
" with either .root or .yoda extension!")
2033 v[
'weights'] = weights
2034 if v[
'type'] ==
'Nominal':
2035 NominalFile = weights[0]
2036 v[
'nominal'] = NominalFile
2037 weightsToProcess += [k]
2038 elif v[
'combination'] ==
'none':
pass
2040 fileName = schema.replace(
"!NSFWEIGHTNAME", v[
'nominal'])
2041 fileName = fileName.replace(
"!WEIGHTNAME",
safeFileName(v[
'nominal'], removeExtension=
True))
2042 fileName = fileName.replace(
"!CRWEIGHTNAME",
customReplacements(oldWeight, removeExtension=
True, customReps=customWeightNameReplacements))
2043 fileName = fileName.replace(
"!INDIR", indir)
2044 if orderedWeights
is not None: fileName = fileName.replace(
"!WEIGHTINDEX",
"%d" % orderedWeights.index(v[
'nominal']))
2045 if inFile
is not None:
2046 fileName = fileName.replace(
"!INFILE", inFile)
2047 fileName = fileName.replace(
".root.root",
".root")
2048 fileName = fileName.replace(
".yoda.yoda",
".yoda")
2049 if(os.path.isfile(fileName.split(
":")[0])):
2050 v[
'nominal'] = fileName
2052 print(
"[ERROR] couldn't find file of name " + fileName +
" with either .root or .yoda extension!")
2054 weightsToProcess += [k]
2055 allWeights += weights
2058 combinationRecipeFile, combinationRecipeName =
None, combinationRecipe
2059 if combinationRecipe:
2060 if ":" in combinationRecipe:
2061 combinationRecipeFile, combinationRecipeName = combinationRecipeName.split(
":")
2062 recipe, recipeDetails =
getCombinationRecipe(weightList.keys(), combinationRecipeFile, combinationRecipeName)
2063 if recipe
is not None: formula = recipeDetails[
'combination']
2068 for syst
in weightsToProcess:
2069 if isRoot: fOut[syst] =
'%s/%s.root' % (outdir, syst)
2070 else: fOut[syst] =
'%s/%s.yoda' % (outdir, syst)
2071 labels[syst] = weightList[syst][
'type']
2073 print(
"Doing %s with %d weights: %s" % (syst, len(weightList[syst][
'weights']),
repr(weightList[syst])))
2074 combineVariation(syst, weightList[syst], fOut[syst], regexFilter, regexVeto)
2076 os.system(
"ls %s" % fOut[syst])
2081 if recipe
is not None:
2082 if 'user_defined_components' in recipeDetails.keys():
2083 for syst, details
in recipeDetails[
'user_defined_components'].
items():
2084 if isRoot: fOut[syst] =
'%s/%s.root' % (outdir, syst)
2085 else: fOut[syst] =
'%s/%s.yoda' % (outdir, syst)
2086 labels[syst] = details[
'type']
2089 combinationInfo = {}
2090 combinationInfo[
'type'] = details[
'type']
2091 combinationInfo[
'nominal'] = NominalFile
2093 combinationInfo[
'combination'] =
"customFunction"
2094 combinationInfo[
'function'] = details[
'combination']
2097 combineVariation(syst, combinationInfo, fOut[syst], regexFilter, regexVeto)
2099 os.system(
"ls %s" % fOut[syst])
2102 print(
"[INFO] combining uncertainties using the following recipe:" + recipe)
2103 print(
"[INFO] which uses formula: " + formula)
2106 combinationInfo = {}
2107 combinationInfo[
'type'] =
'All'
2108 combinationInfo[
'nominal'] = NominalFile
2109 combinationInfo[
'weights'] = fOut.values()
2110 combinationInfo[
'combination'] =
"customFunction"
2111 combinationInfo[
'function'] = formula
2114 fOut[
'all'] =
'%s/all.yoda' % outdir
2115 if isRoot: fOut[
'all'] =
'%s/all.root' % outdir
2116 else: fOut[
'all'] =
'%s/all.yoda' % outdir
2117 combineVariation(
'All', combinationInfo, fOut[
'all'], regexFilter, regexVeto)
2119 os.system(
"ls %s" % fOut[syst])
2123 result = OrderedDict()
2125 formulaComponents = averageErrorSize.keys()
2127 for k, v
in sorted(averageErrorSize.items(), key=
lambda x: x[1], reverse=
True):
2129 result[fOut[k]] =
"[Total Uncertainty]"
2132 for component
in formulaComponents:
2133 if component == k: isComponent =
True
2134 if (
not isComponent)
and returnOnlyVariationsInComination:
continue
2135 result[fOut[k]] =
'[%s]' % labels[k]
2143 `fulldir` String (directory containing tarballs to unpack)
2144 `force` Bool [optional] (re-do untarring even if already done)
2145 `verbose` Bool [optional] (Print more output if set to True)
2146 `rootOrYoda` String [optional] (specify file type of tarred objects)
2148 `return` list of [sample, newfn, filepath]
2150 This function goes through a directpry and unpacks all the tarballs it finds.
2151 By default if a matching unpacked file has been found,
2152 a tarball is not needlessly re-unpacked unless option `force` is used.
2155 if fulldir[-1] ==
"/": fulldir = fulldir[:-1]
2156 sample = fulldir.split(
"/")[-1]
2158 os.system(
"cd %s" % fulldir)
2159 print(
"\r[INFO] Processing " + sample)
2160 nSubSamples = len([name
for name
in os.listdir(fulldir)])
2163 os.system(
"rm %s/merged/*" % fulldir)
2164 os.system(
"rm %s/merged_tmp/*" % fulldir)
2165 if len(os.listdir(fulldir)) == 0:
2166 print(
"[ERROR] can't run over empty directory: " + fulldir)
2168 for subsample
in os.listdir(fulldir):
2169 if 'root' in subsample:
2170 newfulldir = fulldir +
"/" + subsample.replace(
".root.tgz",
".untarred")
2172 newfulldir = fulldir +
"/" + subsample.replace(
".yoda.tgz",
".untarred").
replace(
".yoda.gz.tgz",
".untarred")
2173 if (
not subsample.endswith(
'gz')
and not notTGZ):
continue
2174 if (
"untarred" in subsample
and not force):
continue
2175 if (
"merged" in subsample
and not force):
continue
2178 if (
'tgz' in subsample)
and not (os.path.isdir(newfulldir)
and not notTGZ):
2179 os.system(
"mkdir -p %s" % newfulldir)
2180 if verbose: sys.stdout.write(
'\r Unpacking job output %s (%d/%d) ' % (subsample, counter, nSubSamples))
2182 with tarfile.TarFile.open(fulldir +
"/" + subsample,
'r:gz')
as tar:
2183 fnames = tar.getnames()
2184 prefix = os.path.commonprefix(fnames)
2185 if len(fnames) == 1: prefix =
""
2187 tar.extractall(newfulldir)
2189 newfn = fn.replace(prefix,
"")
2190 if ".yoda.gz" not in fn:
2191 os.rename(newfulldir +
"/" + fn, newfulldir +
"/" +
safeFileName(newfn) +
"." + rootOrYoda)
2192 res.append([fulldir, newfn, newfulldir +
"/" +
safeFileName(newfn, removeExtension=
False)])
2194 for fn
in os.listdir(newfulldir):
2195 res.append([fulldir, fn, newfulldir +
"/" +
safeFileName(fn, removeExtension=
False)])
2197 fn = os.path.basename(newfulldir)
2198 newfulldir = os.path.dirname(newfulldir)
2199 os.system(
"mkdir -p %s/%s" % (newfulldir,
safeFileName(fn, removeExtension=
True)))
2200 renamedfn =
'_Nominal.yoda'
2201 os.system(
"cp %s/%s %s/%s/%s" % (newfulldir, fn, newfulldir,
safeFileName(fn, removeExtension=
True), renamedfn))
2202 res.append([fulldir, renamedfn, newfulldir +
"/" +
safeFileName(fn, removeExtension=
True) +
"/" + renamedfn])
2203 print(
'\r[INFO] Done untar-ing %s' % sample)
2209 `progress` dict (a dictionary of the job labels:status)
2210 `key` String (the job label to update)
2211 `message` String (the status to update)
2214 This is a helper function to update and print(a progress update)
2215 when multi-threading
2217 progress[key] = message
2223 `progress` dict (a dictionary of the job labels:status)
2226 This is a helper function print(the progress update in multi-)
2234 printString =
"--> Running with %d threads: " % progress[
'N']
2235 for k, v
in sorted(progress.items(), key=
lambda x: x[0]):
2236 if v ==
'done': done += 1
2237 if v ==
'pend': pend += 1
2238 if v ==
'run': run += 1
2239 printString +=
" DONE =%d, RUN =%d, PENDING =%d " % (done, run, pend)
2240 if len(printString) > nChar:
2241 printString = printString[:nChar] +
"..."
2242 sys.stdout.write(
'\r %s' % printString)
2246 def mergeInChunks(outfn, paths, progressDict=None, nFilesPerChunk=100, force=False, rootOrYoda='yoda'):
2248 `outfn` String (The name of the output Yoda file you wish to produce, with full path)
2249 `paths` list[String] (List of Yoda files to merge, with full paths)
2250 `progressText` String [optional] (An extra string to print(at the end of the progress message) )
2251 `nFilesPerChunk` int [optional] (How many files to do per chunk)
2252 `force` Bool [optional] by default, if there is already a matching merged file, this function does nothing
2253 but `force` forces the merge again
2254 `rootOrYoda` String [optional] (specify file type of objects to merge)
2257 This function safely merges multiple yoda files in chunks of nFilesToProcess at a time (if too many, yodamerge can fail!)
2258 Recommended is nFilesPerChunk = 100, but definitely less than 300.
2260 fulldir = os.path.dirname(os.path.abspath(outfn))
2261 os.system(
"mkdir -p %s" % fulldir)
2262 os.system(
"mkdir -p %s/tmp" % fulldir)
2263 fnShort = os.path.basename(outfn)
2264 mergeTool =
"yodamerge_tmp.py -o "
2266 if 'root' == rootOrYoda:
2267 assert(
'root' in fnShort
and 'yoda' not in fnShort)
2268 fnShort = fnShort.partition(
".root")[0]
2269 mergeTool =
"hadd -f "
2271 assert(
'root' not in fnShort
and 'yoda' in fnShort)
2272 fnShort = fnShort.partition(
".yoda")[0]
2273 gzsuffix = fnShort.partition(
".yoda")[2]
2274 if progressDict
is not None:
2276 if os.path.isfile(outfn)
and not force:
2279 time.sleep(0.1 * randint(1, 50))
2281 if len(paths) > nFilesPerChunk:
2282 for iChunk
in range(0, len(paths), nFilesPerChunk):
2284 chunkPaths = paths[iChunk:iChunk + nFilesPerChunk]
2285 chunkFilesToMerge =
' '.
join(chunkPaths)
2286 chunkMergedFile =
"%s/tmp/%s.chunk%d.%s" % (fulldir, fnShort, iChunk, rootOrYoda + gzsuffix)
2287 os.system(
"%s %s %s &> out1.%d.log " % (mergeTool, chunkMergedFile, chunkFilesToMerge, iChunk))
2288 print(
"%s %s %s &> out1.%d.log " % (mergeTool, chunkMergedFile, chunkFilesToMerge, iChunk))
2289 filesToMerge += chunkMergedFile +
" "
2291 filesToMerge =
' '.
join(paths)
2292 if 'yoda' == rootOrYoda:
2294 if ".yoda.gz" not in p:
2295 os.system(
"sed -i 's/|//g' %s" % p)
2296 os.system(
"%s %s %s &> out2.log " % (mergeTool, outfn, filesToMerge))
2297 if progressDict
is not None:
2301 def getCrossSectionCorrection(xsList, systFiles, nomFiles, rivetNormalised=False, applyKFactorCorrection=False, varWeightName="", nominalWeightName="", sumwHistName=""):
2303 `xsList` list[Float] (list of nominal XS values for the subsamples)
2304 `systFiles` list[String] (list of file names for the variated subsamples
2305 (same order as xsList)
2306 `nomFiles` list[String] (list of file names for the nominal subsamples
2307 `rivetNormalised` Bool [optional] (whether or not to apply the total yield
2308 correction for variation AOs, since by default rivet normalises by sum-of-weights
2309 for that variation rather than sum-of-weight of the nominal)
2310 `applyKFactorCorrection` Bool [optional] (If the sample to merge has a k-factor, there
2311 is an additional correction needed to avoid double counting the normalisation uncertainty)
2312 `varWeightName` String [optional] (name of the variation to get weight correction for)
2313 `nominalWeightName` String [optional] (name of the nominal variation to get weight correction with respect to)
2314 (same order as xsList)
2315 `sumwHistName` String [optional] (name of the TH1 with sum-of-weights in the root file output scenario)
2317 This function takes a list of nominal and variated YODA/ROOT files, and a list of
2318 nominal cross-sections in the same order, to calculate the on-the-fly cross-
2319 section corrections needed for each systFile. For yoda files produced in the Rivet 2.x
2320 series, a correction is needed to undo the fact that Rivet normalises systematics
2321 by the varied sum of weights instead of the nominal sum of weights.
2322 If using a k-factor, an additional correction is needed to avoid double-counting
2323 the normalisation uncertainty. For details, see
2324 https://twiki.cern.ch/twiki/bin/view/AtlasProtected/PmgSystematicUncertaintyRecipes#On_the_fly_systematic_variations
2328 inclusive_xs_nom = 0.
2329 inclusive_xs_syst = 0.
2330 assert len(xsList) == len(nomFiles)
2331 assert len(xsList) == len(systFiles)
2333 for i
in range(len(xsList)):
2337 inclusive_xs_nom += xs
2338 if rivetNormalised: inclusive_xs_syst += xs *
weightCorrection(syst, nom, varWeightName=varWeightName, nominalWeightName=nominalWeightName, sumwHistName=sumwHistName)
2339 else: inclusive_xs_syst += xs * 1.
2341 for i
in range(len(xsList)):
2346 if rivetNormalised: weight *=
weightCorrection(syst, nom, varWeightName=varWeightName, nominalWeightName=nominalWeightName, sumwHistName=sumwHistName)
2347 if applyKFactorCorrection: weight *= (inclusive_xs_nom / inclusive_xs_syst)
2348 res.append([syst, weight])
2355 This module can also be run as a standalone executable.
2356 For info about the options try:
2357 systematicsTool.py -h
2359 This is useful for unpacking large GRID job outputs, as it will do all the
2360 book-keeping, untarring of tarballs, merging across jobs, across subsamples,
2361 across samples weighted by DSID, and then finally processing the systematics
2362 and plotting them. Multithreading available.
2364 parser = optparse.OptionParser(usage=
"%prog [options]")
2365 parser.add_option(
"-d",
"--directory", help=
"Directory containing the rucio downloads.", dest=
"directory", default=
"samples")
2366 parser.add_option(
"-p",
"--process", help=
"Name of process to merge. Will only apply merge to directories containing this name. Also accepts the format regex:label, where the label is used to name the output directory, and the regex to select which directories to merge. Accepts comma separated list of regex:label pairs", dest=
"process", default=
"Zee")
2367 parser.add_option(
"-g",
"--generator", help=
"Name of generator to merge. Will only apply merge to directories containing this name. Also accepts the format regex:label, where the label is used to name the output directory, and the regex to select which directories to merge. Accepts comma separated list of regex:label pairs", dest=
"generator", default=
"Sherpa")
2368 parser.add_option(
"--force", help=
"By default, if a subsample has already been untarred and merged, it is not done again. This option forces the script to re-untar and re-merge. Slow, unless you use a lot of threads...", dest=
"force", default=
False)
2369 parser.add_option(
"--plotsDir", help=
"Where to save the plots to? Default is <generator>_<process>_<label>/plots", dest=
"plotsDir", default=
None)
2370 parser.add_option(
"-l",
"--label", help=
"An additional label for the output directory of the merge and as a prefix to the plots", dest=
"label", default=
"")
2371 parser.add_option(
"-f",
"--filter", help=
"Only process AOs matching this regex", dest=
"filter", default=
".*")
2372 parser.add_option(
"-e",
"--exclude", help=
"Do NOT process AOs matching this regex", dest=
"exclude", default=
None)
2373 parser.add_option(
"-j",
"--nThreads", help=
"Multithread some of the merging steps", dest=
"nThreads", default=4)
2374 parser.add_option(
"--noSyst", help=
"Ignore systematics, only merge/process/draw the nominal weight", dest=
"noSyst", default=
False, action=
"store_true")
2375 parser.add_option(
"--notTGZ", help=
"Use if your input files were just yoda/root files not tarballs", dest=
"notTGZ", default=
False, action=
"store_true")
2376 parser.add_option(
"--uxs",
"--userXS", help=
"When looking up cross-sections for each DSID, check the specified file first before defaulting to CVMFS values", dest=
"userXS", default=
None)
2377 parser.add_option(
"--nw",
"--nominalWeight", help=
"Bypass the DSID_weights database, and use the specified name as the nominal. Only for us with --noSyst", dest=
"nominalWeight", default=
"")
2378 parser.add_option(
"--sp",
"--skipPlots", help=
"Do the merging of files, but just skip the plotting at the end", dest=
"skipPlots", default=
False, action=
"store_true")
2379 parser.add_option(
"--normalize", help=
"Normalize any output plots to unit integral", dest=
"normalize", default=
False, action=
"store_true")
2380 parser.add_option(
"--r2n",
"--rivet2Normalised", help=
"Are your histograms normalised by the sum of weights in each variation, as is done typically for Rivet2 analyses? specify 1/0 for True/False", dest=
"rivetNormalised", default=
False)
2381 parser.add_option(
"--sw",
"--sumwHistName", help=
"Name of the TH1 that stores the sum-of-weights in the case of root file", dest=
"sumwHistName", default=
"__EVTCOUNT")
2382 parser.add_option(
"--cr",
"--customReplacements", help=
"A .txt file specifying which replacements to apply to weight names, one per line, in the format `<old string>--><new string>` (without quotes), eg ` -->` to replace spaces with nothing, and `.-->_` to replace dots with underscores. One replacement per line which will be applied sequentially in the order listed in the file.", dest=
"customReplacements", default=
None)
2383 parser.add_option(
"--schema", help=
"Tells the tool how to access the analysis objects from the inputs provided. Format is PathToFile:ObjectName. For example, if you have one YODA file per MC Weight, and each histogram is named the same in each input file, you schema would be !INDIR/!WEIGHTNAME.yoda:!AONAME. If you instead store in a single root file all the variations, where the weight name is appended to the histo name, your schema would be !INDIR/!INFILE.yoda:!AONAME[!WEIGHTNAME]. The available identifiers are: !INDIR (path to input dir), !INFILE (name of input file), !AONAME (analysis object name), !WEIGHTNAME (the MC weight name, assuming that safeFileName was applied to changes spaces to underscores etc...), !NSFWEIGHTNAME (assuming no weight name replacements at all), or !CRWEIGHTNAME (using custom repalcements, in this case please use option --customReplacements or --cr to specify them)", dest=
"schema", default=
'!INDIR/!INFILE.yoda:!AONAME[!WEIGHTNAME]')
2384 parser.add_option(
"--plotInfo", help=
"A rivet-style .plot file to tell the tool how to format your output plots. See data/ExclusivePlusInclusive.plot as an example. Also works for plots make from root files!", dest=
"plotInfo", default=
None)
2385 (opts, args) = parser.parse_args()
2387 r.gErrorIgnoreLevel = r. kError
2390 for process
in opts.process.split(
","):
2392 tokens = process.split(
":")
2393 processRegex = tokens[0]
2396 processRegex = process
2400 for generator
in opts.generator.split(
","):
2401 if ":" in generator:
2402 tokens = generator.split(
":")
2403 generatorRegex = tokens[0]
2404 generator = tokens[1]
2406 generatorRegex = generator
2408 if opts.rivetNormalised
is not None:
2409 opts.rivetNormalised =
bool(
int(opts.rivetNormalised))
2412 if opts.rivetNormalised
is True:
2413 if "--schema" not in sys.argv:
2414 opts.schema =
"!INDIR/!WEIGHTNAME.yoda:!AONAME"
2416 rootOrYoda =
'root' if '.root' in opts.schema
else 'yoda'
2417 if "WEIGHT" in opts.schema.split(
":")[0]:
2418 structure =
"OneFilePerVariation"
2420 structure =
"AllVariationsInFile"
2423 thisFilter =
'.*%s.*%s.*' % (generatorRegex, processRegex)
2426 if opts.plotsDir
is None:
2427 opts.plotsDir =
"%s_%s_%s/plots" % (process, generator, opts.label)
2428 os.system(
"mkdir -p %s" % opts.plotsDir)
2432 jobsMergedFiles = {}
2433 print(
"================================================================================")
2434 print(
"[INFO] Processing files which match this regex " + thisFilter)
2435 print(
"================================================================================")
2440 print(
"-----------------------------------------------------------------------")
2441 print(
"[INFO] Untar Grid outputs ")
2442 print(
"------------------------------------------------------------------------")
2445 pool = ThreadPool(
int(opts.nThreads))
2449 samplesToProcess = []
2450 workingDir = opts.directory
2451 if '/afs/' not in workingDir: workingDir = pwd +
"/" + opts.directory
2453 for sample
in os.listdir(workingDir):
2455 fulldir = workingDir +
"/" + sample
2456 if not re.search(thisFilter, sample):
continue
2457 if not re.search(
'user.', sample):
continue
2459 if not os.path.isfile(fulldir):
2460 samplesToProcess += [fulldir]
2461 for fulldir
in samplesToProcess:
2462 threadInputs.append([fulldir, opts.force])
2463 untarredFiles[fulldir] = {}
2464 if len(threadInputs) == 0:
2465 print(
"[ERROR] no directories in %s matches the requested regex! exit !" % workingDir)
2468 def threadJobFunction(inputs):
2471 results = pool.map(threadJobFunction, threadInputs)
2474 for jobResult
in results:
2475 for res
in jobResult:
2476 if structure ==
"OneFilePerVariation":
2477 untarredFiles[res[0]].setdefault(res[1], []).
append(res[2])
2479 untarredFiles[res[0]].setdefault(
'AllVariations', []).
append(res[2])
2483 print(
"------------------------------------------------------------------------")
2484 print(
"[INFO] Merging across jobs for each DSID")
2485 print(
"------------------------------------------------------------------------")
2487 for fulldir
in sorted(untarredFiles.keys()):
2488 print(
"[INFO] Merging files for jobs of " + os.path.basename(fulldir))
2490 infiles = untarredFiles[fulldir]
2491 nFileTypes = len(infiles.keys())
2493 print(
"[ERROR] 0 file types found in inputs! Exit!")
2494 print(untarredFiles)
2500 def threadJobFunction(inputs):
2501 mergeInChunks(inputs[0], inputs[1], progressDict=inputs[2], rootOrYoda=rootOrYoda)
2504 pool = ThreadPool(
int(opts.nThreads))
2508 progressDict[
'N'] =
int(opts.nThreads)
2509 for fn, paths
in infiles.items():
2512 progressDict[basename] =
'pend'
2513 outfn =
"%s/merged/%s.%s" % (fulldir, basename, rootOrYoda)
2514 sampleName = os.path.basename(fulldir)
2515 threadInputs.append([outfn, paths, progressDict])
2516 jobsMergedFiles.setdefault(basename, {})[sampleName] = outfn
2517 results = pool.map(threadJobFunction, threadInputs)
2520 print(
'\r[INFO] Done Merging %s' % os.path.basename(fulldir))
2524 print(
"------------------------------------------------------------------------")
2525 print(
"[INFO] Get available MEweights from DSID database and make sure they are consistent!")
2526 print(
"------------------------------------------------------------------------")
2530 def threadJobFunction(inputs):
2534 dsid = re.findall(
r"\.[0-9]{6,6}\.", fulldir)[0].
replace(
".",
"")
2535 print(
"\r[INFO] Looking up XS and MEWeights for DSID = " +
repr(dsid))
2536 if rootOrYoda ==
'yoda' and getXSFromYODA(jobsMergedFiles.values()[0][os.path.basename(fulldir)]) != 1.0:
2537 print(
"\r[INFO] Histograms in YODA file of DSID =%s are already scaled by xsec. Set xsec to 1.0 to avoid double scaling!" % dsid)
2538 xsDict[dsid] = 1.0, 0, 0
2540 xsDict[dsid] =
getXS(
int(dsid), userFile=opts.userXS)
2541 availableWeights =
None
2542 nominalWeight =
None
2544 if availableWeights
is None:
2545 availableWeights = rDB.getWeights(dsid)[0]
2546 for k, v
in availableWeights.items():
2547 if v[
'type'] ==
'Nominal':
2548 nominalWeight = v[
'nominal']
2551 nominalWeight = jobsMergedFiles.keys()[0]
2552 if len(opts.nominalWeight) > 0: nominalWeight = opts.nominalWeight
2553 availableWeights = {
'_Nominal':
2554 {
'nominal_pdf':
'-1',
'type':
'Nominal',
'nominal':
'%s' % nominalWeight,
2555 'weights': [
'%s' % nominalWeight],
'combination':
'none'
2558 if xsDict[dsid][2] > 0:
2559 availableWeights[
"Normalization"] = {
'nominal_pdf':
'-1',
'type':
'Normalization',
'nominal': nominalWeight,
'weights': [nominalWeight],
'combination':
'norm',
"value": xsDict[dsid][2]}
2561 return [availableWeights, dsid]
2563 pool = ThreadPool(
int(opts.nThreads))
2565 for fulldir
in sorted(untarredFiles.keys()):
2566 threadInputs.append([fulldir, opts.noSyst, xsDict])
2568 results = pool.map(threadJobFunction, threadInputs)
2573 print(
"[INFO] Consistentcy Checks:")
2574 availableWeights =
None
2576 for dsid, xs
in xsDict.items():
2577 if (xsDict[dsid][0] == -1):
2578 print(
"\r[INFO] ERROR: could not find Xs for DSID =%s. Exit." % (
str(dsid)))
2581 print(
"\r[INFO] DSID =%s has XS =%.6f pb (kFactor =%.2f, normUnvertainty =%.2f)" % (
str(dsid), xsDict[dsid][0], xsDict[dsid][1], xsDict[dsid][2]) +
" and found ME weights")
2583 if availableWeights
is None:
2584 availableWeights = res[0]
2585 print(
"[INFO] DSID = " + res[1] +
" has weights" +
repr(availableWeights.keys()))
2588 print(
"[INFO] DSID = " + res[1] +
" has weights consistent with the above")
2590 print(
"[ERROR] DSID = " + res[1] +
" has weights NOT consistent with the above!!")
2593 for k, v
in availableWeights.items():
2596 for w
in v[
'weights']:
2597 if w
in uniqueWeights:
continue
2598 else: uniqueWeights += [w]
2600 if (len(uniqueWeights) == 0
and not opts.noSyst):
2601 print(
"[ERROR] Your sample DISD = " + res[1] +
" has no weights... so it might be missing from the database!")
2602 print(
"[ERROR] Please contact lcorpe@cern.ch to get your sample added...")
2603 print(
"[ERROR] In the meantime, we will exit since no combination recipe is availale... you can still run with --noSyst")
2608 print(
"------------------------------------------------------------------------")
2609 print(
"[INFO] Calculating on the fly corrections to XSs for each systematic")
2610 print(
"------------------------------------------------------------------------")
2612 if opts.rivetNormalised
is None:
2613 if rootOrYoda ==
'yoda': opts.rivetNormalised =
True
2614 else: opts.rivetNormalised =
True
2616 allFilesToMerge = {}
2618 def threadJobFunction(inputs):
2620 progressDict = inputs[4]
2624 return systName, res
2626 pool = ThreadPool(
int(opts.nThreads))
2630 applyKFactorCorrection =
None
2631 progressDict[
'N'] =
int(opts.nThreads)
2632 for syst
in uniqueWeights:
2633 if structure ==
"AllVariationsInFile":
2634 sample = jobsMergedFiles.values()[0]
2637 systSampleFiles = []
2640 for sampleName, path
in sample.items():
2641 dsid = re.findall(
"[0-9]{6,6}", sampleName)[0].
replace(
".",
"")
2642 xsList += [xsDict[dsid][0]]
2643 if applyKFactorCorrection
is None: applyKFactorCorrection = xsDict[dsid][1]
2644 elif applyKFactorCorrection != xsDict[dsid][1]:
2645 print(
"[ERROR], cannot merge samples with inconsistent k-factors !")
2649 systSampleFiles += [path]
2650 if structure ==
"AllVariationsInFile":
2651 nomSampleFiles += [jobsMergedFiles.values()[0][sampleName]]
2653 nomSampleFiles += [jobsMergedFiles[
safeFileName(nominalWeight[
'nominal'])][sampleName]]
2655 if structure ==
"AllVariationsInFile":
2656 threadInputs += [[syst, xsList, systSampleFiles, nomSampleFiles, progressDict, opts.rivetNormalised, applyKFactorCorrection,
safeFileName(syst),
safeFileName(nominalWeight[
'nominal'])]]
2658 threadInputs += [[syst, xsList, systSampleFiles, nomSampleFiles, progressDict, opts.rivetNormalised, applyKFactorCorrection,
"",
""]]
2659 progressDict[syst] =
'pend'
2661 results = pool.map(threadJobFunction, threadInputs)
2665 for jobResult
in results:
2667 for systFile, weight
in jobResult[1]:
2668 allFilesToMerge.setdefault(syst, []).
append(
" %s:%f" % (systFile, weight))
2671 print(
"------------------------------------------------------------------------")
2672 print(
"[INFO] Merging files across DSIDs for each systematic")
2673 print(
"------------------------------------------------------------------------")
2675 def threadJobFunction(inputs):
2678 generator = inputs[1]
2681 os.system(
"mkdir -p %s_%s_%s/merged_sample" % (process, generator, opts.label))
2682 if rootOrYoda ==
'root':
2684 if structure ==
"AllVariationsInFile":
2685 mergedFile =
"%s_%s_%s/merged_sample/%s.%s" % (process, generator, opts.label,
safeFileName(s), rootOrYoda)
2688 command =
"haddw -f %s -r %s %s %s &> out.txt " % (thisFilter, thisFindAndReplace, mergedFile,
" ".
join(allFilesToMerge[
safeFileName(s)]))
2689 if debug:
print(command)
2692 if structure ==
"OneFilePerVariation":
2693 mergedFile =
"%s_%s_%s/merged_sample/%s.%s" % (process, generator, opts.label,
safeFileName(s), rootOrYoda)
2694 command =
"haddw %s %s &> out.txt " % (mergedFile,
" ".
join(allFilesToMerge[s]))
2697 mergedFile =
"%s_%s_%s/merged_sample/%s.%s" % (process, generator, opts.label,
safeFileName(s), rootOrYoda)
2698 if structure ==
"AllVariationsInFile":
2708 command =
"yodascale -c '.* %fx' -i %s &> out.txt \n" % (
float(sampleToMerge[1]), mergedFile)
2709 if debug:
print(command)
2713 command =
r"yodamerge_tmp.py --add -o %s %s -M .*\\.*\\.* &> out.txt " % (mergedFile,
" ".
join(allFilesToMerge[
safeFileName(s)]))
2714 if debug:
print(command)
2718 command =
"yodascale -c '.* %fx' -i %s &> out.txt \n" % (
float(sampleToMerge[1]), mergedFile)
2719 if debug:
print(command)
2721 if structure ==
"OneFilePerVariation":
2722 if len(allFilesToMerge[s]) == 1:
2723 sampleToMerge = allFilesToMerge[s][0].
split(
":")
2724 command =
"cp %s %s ; yodascale -c '.* %fx' -i %s &> out.txt \n" % (sampleToMerge[0], mergedFile,
float(sampleToMerge[1]), mergedFile)
2726 command =
"yodamerge_tmp.py --add -o %s %s &> out.txt " % (mergedFile,
" ".
join(allFilesToMerge[s]))
2727 if debug:
print(command)
2732 if rootOrYoda ==
'root': nThreads = 1
2733 else: nThreads =
int(opts.nThreads)
2734 pool = ThreadPool(nThreads)
2737 progressDict[
'N'] = nThreads
2738 if (structure ==
"AllVariationsInFile"):
2739 for i, s
in enumerate(uniqueWeights):
2740 progressDict[s] =
'pend'
2741 threadInputs.append([s, generator, process, progressDict])
2742 elif (structure ==
"OneFilePerVariation"):
2743 for i, s
in enumerate(allFilesToMerge.keys()):
2744 progressDict[s] =
'pend'
2745 threadInputs.append([s, generator, process, progressDict])
2747 results = pool.map(threadJobFunction, threadInputs)
2753 print(
"------------------------------------------------------------------------")
2754 print(
"[INFO] Combining variations into named sources of uncertainty")
2755 print(
"------------------------------------------------------------------------")
2757 mergedSystDict =
combineAllVariations(availableWeights,
"%s_%s_%s/merged_sample" % (process, generator, opts.label),
"%s_%s_%s/merged_final/." % (process, generator, opts.label), opts.filter, opts.exclude, schema=
"!INDIR/!WEIGHTNAME.%s:!AONAME" % rootOrYoda, customWeightNameReplacements=opts.customReplacements)
2760 print(
"-----------------------------------------------")
2761 print(
"[INFO] Produce plots of the systematic uncertainties (with bands)")
2762 print(
"-----------------------------------------------")
2764 if not opts.skipPlots
and rootOrYoda ==
"yoda":
2765 plots =
makeSystematicsPlotsWithRIVET(mergedSystDict, opts.plotsDir,
"Nominal", label=opts.label, plotInfo=opts.plotInfo, normalize=opts.normalize)
2767 print(
"[INFO] printed: " + p)
2770 if __name__ ==
"__main__":