ATLAS Offline Software
buildDatabase.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 # Copyright (C) 2002-2022 CERN for the benefit of the ATLAS collaboration
3 
4 import os
5 import sys
6 import yaml
7 import argparse
8 import re
9 # Author: Abhishek Nag (TU Dresden)
10 # email: abhishek.nag@cern.ch
11 
12 SystToolsPath = ""
13 if 'SYSTTOOLSPATH' in os.environ.keys():
14  SystToolsPath = os.environ['SYSTTOOLSPATH']
15 else:
16  print("[ERROR] Environment variable SYSTTOOLSPATH is not set. It should be set to the systematics-tools directory. Use setupSystematicsToolR21.sh")
17  exit(1)
18 pdfsets = ['251', '252', '10000', '10042', '10550', '10770', '10771', '10772', '10800', '10860', '10900', '10960', '10980', '10981', '10982', '10983', '11000', '11062', '11063', '11064', '11065', '11066', '11067', '11068', '11069', '11070', '11071', '11072', '11073', '11074', '11075', '11076', '11077', '11080', '11082', '11100', '11162', '11163', '11164', '11165', '11166', '11167', '11168', '11169', '11170', '11171', '11172', '11173', '11174', '11175', '11176', '11177', '11180', '11182', '11200', '11260', '11261', '11262', '11263', '11264', '11265', '11266', '11267', '11268', '11269', '11270', '11271', '11272', '11273', '11274', '11275', '11276', '11277', '11278', '11279', '11280', '12000', '12100', '12200', '12300', '12400', '13000', '13060', '13061', '13062', '13063', '13064', '13065', '13066', '13067', '13068', '13069', '13070', '13071', '13072', '13081', '13090', '13091', '13092', '13100', '13158', '13159', '13160', '13161', '13162', '13163', '13164', '13165', '13166', '13167', '13168', '13169', '13170', '13190', '13191', '13192', '13200', '13201', '13202', '13203', '13205', '13206', '13207', '13208', '13300', '13350', '13400', '13450', '20460', '20463', '20465', '20650', '20651', '21000', '21050', '21100', '21150', '21200', '21250', '22000', '22100', '22150', '22200', '22250', '22300', '22350', '22400', '22450', '22500', '22600', '22650', '22700', '22750', '22800', '22850', '22900', '22950', '23000', '23050', '23100', '23150', '23200', '23250', '23300', '23350', '23400', '23420', '23440', '23460', '23480', '23490', '23500', '23550', '23600', '23650', '23700', '23720', '23740', '23760', '23780', '23790', '23800', '23850', '23900', '23950', '24000', '24050', '24100', '24150', '25000', '25060', '25100', '25200', '25260', '25270', '25300', '25360', '25370', '25400', '25410', '25420', '25500', '25510', '25520', '25560', '25570', '25600', '25605', '25610', '25615', '25620', '25625', '25630', '25635', '25640', '25645', '25650', '25655', '25700', '25710', '25720', '25760', '25770', '25800', '25805', '25810', '25830', '25840', '25850', '40650', '40750', '40780', '40810', '40850', '40950', '42000', '42030', '42060', '42100', '42130', '42160', '42200', '42230', '42300', '42330', '42360', '42400', '42430', '42460', '42500', '42530', '42560', '42600', '42630', '42660', '42690', '42720', '42750', '42780', '42810', '42840', '60600', '60630', '60650', '60700', '60730', '60750', '60800', '61000', '61100', '61130', '61200', '61230', '61300', '61330', '61360', '61380', '61400', '61430', '61500', '61530', '61600', '61630', '61650', '61690', '61691', '61692', '61693', '61694', '61695', '61696', '61697', '61698', '61699', '61700', '61701', '61702', '61703', '61704', '61710', '61711', '61712', '61713', '61714', '61715', '61716', '61717', '61718', '61719', '61720', '61721', '61722', '61723', '61724', '61725', '61726', '61727', '61728', '61729', '61730', '61740', '61741', '61742', '61743', '61744', '61745', '61746', '61747', '61748', '61749', '61750', '61751', '61752', '61753', '61754', '61755', '61756', '61757', '61758', '61759', '61760', '65000', '65040', '65060', '65100', '65120', '70400', '80000', '80111', '80112', '80120', '80200', '81000', '81050', '81100', '81150', '81200', '81250', '82000', '82200', '82350', '90000', '90200', '90400', '90500', '90700', '90900', '90950', '91000', '91200', '91400', '91500', '91700', '91900', '91950', '92000', '100000', '100050', '100100', '100150', '100200', '100250', '100300', '100350', '100400', '100450', '100500', '100550', '100600', '100650', '100700', '100750', '100800', '100850', '100900', '100950', '101000', '101050', '101100', '102000', '102050', '102100', '102150', '102200', '102250', '102300', '102350', '102400', '102450', '102500', '102550', '102600', '102650', '102700', '102750', '102800', '102850', '102900', '102950', '103000', '103050', '103100', '104000', '104050', '104100', '104150', '104200', '104250', '104300', '104350', '104400', '104450', '104500', '104550', '104600', '104650', '104700', '104750', '104800', '104850', '104900', '104950', '105000', '105050', '105100', '106000', '106050', '106100', '106150', '106200', '106250', '106300', '106350', '106400', '106450', '106500', '106550', '106600', '106650', '106700', '106750', '106800', '106850', '106900', '106950', '107000', '107050', '107100', '200200', '200400', '200600', '200800', '229000', '229200', '229400', '229600', '229800', '230000', '230200', '230400', '230600', '230800', '231000', '231200', '231400', '231600', '231800', '232000', '232200', '232400', '232600', '232800', '233000', '233200', '233400', '233600', '233800', '234000', '234200', '234400', '234600', '234800', '235000', '235200', '235400', '235600', '235800', '236000', '236200', '236400', '236600', '236800', '237000', '237200', '237400', '237600', '237800', '238000', '238200', '238400', '238600', '238800', '239000', '239200', '239400', '239600', '239800', '240000', '240200', '240400', '240600', '240800', '241000', '241200', '241400', '241600', '241800', '242000', '242200', '242400', '242600', '242800', '243000', '243200', '243400', '243600', '243800', '244000', '244200', '244400', '244600', '244800', '245000', '245200', '245400', '245600', '245800', '246000', '246200', '246400', '246600', '246800', '247000', '247200', '247400', '250000', '251000', '260000', '260200', '260400', '260600', '260800', '261000', '261200', '261400', '261600', '261800', '262000', '262200', '262400', '262600', '263000', '263200', '263400', '263600', '264000', '264200', '264400', '264600', '265000', '265200', '265400', '265600', '266000', '266200', '266400', '266600', '267000', '267200', '267400', '267600', '268000', '268200', '268400', '268600', '269000', '269200', '269400', '269600', '270000', '270200', '270400', '270600', '271000', '271200', '271400', '271600', '272000', '272200', '272400', '273000', '273200', '273400', '274000', '274200', '274400', '275000', '275200', '275400', '280000', '280200', '280400', '281000', '281200', '281400', '282000', '282200', '282400', '283000', '283200', '283400', '284000', '284200', '284400', '285000', '285200', '285400', '288000', '290000', '290200', '290400', '291000', '291200', '291400', '292000', '292200', '292400', '292600', '293000', '295000', '297000', '299000', '301000', '303000', '303200', '303400', '303600', '303800', '304000', '304200', '304400', '304600', '304800', '305000', '305200', '305400', '305600', '305800', '306000', '306200', '306400', '307000', '309000', '311000', '313000', '315000', '315200', '315400', '315600', '315800', '316000', '316200', '316400', '316600', '316700', '316900', '317100', '317300', '317500', '317700', '317900', '318100', '318300', '318500', '318700', '318900', '319100', '319300', '319500', '319700', '319900', '320100', '320300', '320500', '320700', '320900', '321100', '321300', '321500', '321700', '321900', '322100', '322300', '322500', '322700', '322900', '323100', '323300', '323500', '323700', '323900', '324100', '324300', '324500', '324700', '324900', '325100', '500000', '502000', '504000', '506000', '900000', '900100', '900200', '900300', '900400', '900500', '900600', '900700', '900800', '900900', '901000', '901100', '901200', '901300', '2000000', '2000200', '2000400', '2000600', '2000800', '2001000', '2001200', '2001400', '2001600', '2001800', '2002000', '2002200', '2002400', '2002600', '2002800', '2003000', '2003200', '2003400', '2003600', '2003800', '2004000', '2004200', '2004400', '2004600', '2004800', '2005000', '2005200']
19 
20 NNLO_PDF = {'MMHT': ['25200', '25410', '25400'], 'CT14': ['13060', '13061', '13062', '13063', '13064', '13165', '13166', '13167', '13168', '13169', '13170', '13190', '13192', '13200', '13201', '13202', '13203', '13205', '13206', '13207', '13208', '13065', '13191'], 'PDF4LHC15_NLO_30': ['90900'], 'PDF4LHC15': ['92000', '90900', '91900', '91700', '90700'], 'NNPDF_NLO_0117': ['265000'], 'NNPDF_0117': ['265000'], 'NNPDF_NLO_0119': ['266000'], 'NNPDF31_NLO_0118': ['303400'], 'NNPDF30_NLO_0118': ['260000']}
21 
22 list_of_NNLOPDF = [y for x in NNLO_PDF.values() for y in x]
23 
24 alpha_S = {'265000': 'NNPDF_NLO', '266000': 'NNPDF_NLO', '270000': 'NNPDF_NNLO', '269000': 'NNPDF_NNLO', '91931': 'PDF4LHC_NNLO', '91932': 'PDF4LHC_NNLO', '90931': 'PDF4LHC_NLO', '90932': 'PDF4LHC_NLO'}
25 
26 # alpha_ID = [y for x in alpha_S.values() for y in x]
27 
28 NNLO = {'NNLO_scale': ['nnlops-nnloUp', 'nnlops-nnloDn', 'nnlops-nnloNom'], 'TopBottomMass_NLO': ['mtmb-bminlo', 'mtinf', 'mtmb'], 'TopBottomMass_NNLO': ['nnlops-bminlo', 'nnlops-mtinf'], 'NOMINAL_NNLO_NNPDF': ['nnlops-nominal'], 'NOMINAL_NNLO_PDF4LHC': ['nnlops-nominal-pdflhc']}
29 
30 list_NNLO = [y for x in NNLO.values() for y in x]
31 # ext = ['scale', 'hessian']
32 
33 Hessian = ['90900', '90700', '91900', '91700', '90400']
34 
35 affectedDSID = range(502198, 502354) + range(302266, 302291) + range(307731, 307748) + range(450765, 450774) + range(367515, 367671) + range(502198, 502354) + [309529, 312655, 412006]
36 pythiaDSID = range(364700 - 364713)
37 
38 
39 def makefile(filename):
40  with open(filename, 'w+') as f:
41  f.write('{}')
42 
43 
44 def file_is_empty(path):
45  return os.stat(path).st_size == 0
46 
47 
48 def hasNumbers(inputString):
49  return any(char.isdigit() for char in inputString)
50 
51 
52 def hes(gen, dictionary, weight_dict):
53  hessian = dict((key, value) for (key, value) in dictionary.items() if 'hessian' in key)
54  hess_dict = {}
55  if hessian:
56  h = dict((key, value) for (key, value) in dictionary.items() if '_0_hessian' in key)
57  key = h.keys()[0]
58  key = key.replace('_0_hessian', '')
59  pdf = next(re.finditer(r'\d+$', hessian.keys()[0].split('_')[0])).group(0)
60  if len(hessian) > 1:
61  weight_dict.update({gen + '_ME_PDF' + pdf + '_Hessian': {'weights': sorted(hessian.values()), 'combination': 'lhapdf', 'type': 'PDF_ME', 'nominal': h.values()[0], 'nominal_pdf': pdf}})
62  dictionary = dict(set(dictionary.items()) - set(hessian.items()))
63  hess_dict.update({key: h.values()[0]}) # make a copy without hessian tag
64  dictionary.update(hess_dict)
65  return dictionary, weight_dict, hess_dict
66 
67 
68 def nom(gen, dictionary, weight_dict):
69  nominal = dict((key, value) for (key, value) in dictionary.items() if 'Nominal' in key)
70  p_nom = '-1'
71  if nominal:
72  n = nominal.values()[0].strip()
73  if not hasNumbers(n):
74  n = nominal.values()[1].strip()
75  # print n
76  pdf = getPDF(n) # next(re.finditer(r'\d+$',n)).group(0)
77  # pdf = str(getPDF(n))
78  p_var = str(int(pdf) % 100)
79  if any(str(int(pdf) - int(p_var)) == pdfset for pdfset in pdfsets):
80  p_nom = str(int(pdf) - int(p_var))
81  else:
82  p_var = str(int(pdf) % 1000)
83  p_nom = str(int(pdf) - (int(p_var)))
84  if len(nominal) > 1:
85  weight_dict.update({gen + '_ME_PDF' + p_nom + '_var': {'weights': sorted(nominal.values()), 'combination': 'lhapdf', 'type': 'PDF_ME', 'nominal': nominal.get('Nominal'), 'nominal_pdf': p_nom}})
86  else:
87  weight_dict.update({gen + '_ME_PDF' + p_nom: {'weights': sorted(nominal.values()), 'combination': 'lhapdf', 'type': 'PDF_ME', 'nominal': nominal.get('Nominal'), 'nominal_pdf': p_nom}})
88  dictionary = dict(set(dictionary.items()) - set(nominal.items()))
89  return dictionary, weight_dict, p_nom
90 
91 
92 def alph(gen, dictionary, weight_dict, nom_pdf, p_nom):
93  alpha = dict((key, value) for (key, value) in dictionary.items() if key.endswith('_alphaS'))
94  alpha_dict = {}
95  w = list()
96  if alpha:
97  for a in alpha.keys():
98  pdf = next(re.finditer(r'\d+$', a.split('_')[0])).group(0)
99  w.append(alpha_S.get(pdf))
100  k = a.replace('_alphaS', '')
101  alpha_dict.update({k: alpha.get(a)})
102  lst = list(set(w))
103  astr = '_'.join(lst)
104  weight_dict.update({gen + '_ME_PDF' + p_nom + '_alphaS_' + astr: {'weights': sorted(alpha.values()), 'combination': 'alphaS', 'type': 'alphaS', 'nominal': nom_pdf, 'nominal_pdf': p_nom}})
105  dictionary = dict(set(dictionary.items()) - set(alpha.items()))
106  dictionary.update(alpha_dict)
107  return dictionary, weight_dict, alpha_dict
108 
109 
110 def alternate(gen, dictionary, weight_dict, p_nom):
111  alt = dict((key, value) for (key, value) in dictionary.items() if 'PDF' in key)
112  w = []
113  if alt:
114  for element in alt.keys():
115  pdf = getPDF(element) # next(re.finditer(r'\d+$', element)).group(0)
116  # print pdf
117  if any(pdf == pdfset for pdfset in pdfsets):
118  weight_dict.update({gen + '_ME_PDF' + pdf: {'weights': [alt.get(element)], 'combination': 'lhapdf', 'type': 'altPDF', 'nominal': alt.get(element), 'nominal_pdf': pdf}})
119  else:
120  # print element
121  pdf = getPDF(element) # next(re.finditer(r'\d+$', element.split('_')[0])).group(0)
122  w.append(pdf)
123  lst = list(set(w))
124  # print lst
125  for p in lst:
126  temp = dict((key, value) for (key, value) in alt.items() if str(p) in key)
127  weight_dict.update({gen + '_ME_PDF' + str(p) + '_var': {'weights': sorted(temp.values()), 'combination': 'lhapdf', 'type': 'PDF_ME', 'nominal': sorted(temp.values())[0], 'nominal_pdf': p}})
128  dictionary = dict(set(dictionary.items()) - set(alt.items()))
129  return dictionary, weight_dict
130 
131 
132 def pyth(dictionary, weight_dict, p_nom, nom):
133  py8 = dict((key, value) for (key, value) in dictionary.items() if key.startswith('Py8'))
134  if py8:
135  var = dict((key, value) for (key, value) in py8.items() if value.startswith('Var'))
136  if var:
137  weight_dict.update({'Pythia8_PDF' + p_nom + '_Var3c_alphaS': {'weights': sorted(var.values()), 'combination': 'envelope', 'type': 'alphaS', 'nominal': nom, 'nominal_pdf': p_nom}})
138  nonsing = dict((key, value) for (key, value) in py8.items() if value.startswith('hard'))
139  if nonsing:
140  weight_dict.update({'Pythia8_PDF' + p_nom + '_non_singular': {'weights': sorted(nonsing.values()), 'combination': 'envelope', 'type': 'PDF_ME', 'nominal': nom, 'nominal_pdf': p_nom}})
141  pdf = dict((key, value) for (key, value) in py8.items() if value.startswith('isr: PDF'))
142  if pdf:
143  weight_dict.update({'Pythia8_PDF' + p_nom + '_ISR_PDF': {'weights': sorted(pdf.values()), 'combination': 'envelope', 'type': 'PDF_ME', 'nominal': nom, 'nominal_pdf': p_nom}})
144  isrfsr = dict((key, value) for (key, value) in py8.items() if value.startswith('isr') and 'fsr' in value)
145  if isrfsr:
146  weight_dict.update({'Pythia8_PDF' + p_nom + '_ISR_FSR_alphaS': {'weights': sorted(isrfsr.values()), 'combination': 'envelope', 'type': 'Scale_ME', 'nominal': nom, 'nominal_pdf': p_nom}})
147  dictionary = dict(set(dictionary.items()) - set(py8.items()))
148  py8 = dict(set(py8.items()) - set(var.items()) - set(nonsing.items()) - set(pdf.items()) - set(isrfsr.items()))
149  if py8:
150  weight_dict.update({'Pythia8_Others': {'weights': sorted(py8.values()), 'combination': 'none', 'type': 'none'}})
151  return dictionary, weight_dict
152 
153 
154 def gotoWeightData(weight_dict):
155  keys = [] # list of keys for the dsid in database
156  if not os.path.exists("%s/data//Weight_Database.yaml" % SystToolsPath):
157  makefile("%s/data/Weight_Database.yaml" % SystToolsPath)
158  w_file = open("%s/data/Weight_Database.yaml" % SystToolsPath, 'r+')
159  flag = False
160  data = yaml.load(w_file)
161  for key in weight_dict.keys():
162  # if any(key in datakey for datakey in data.keys() if not any(s in datakey for s in ['scale', 'alphaS', 'Hessian', 'var'])):
163  if any(key == datakey for datakey in data.keys()):
164  if any(weight_dict.get(key) == data.get(k) for k in data.keys()):
165  keys.append(data.keys()[data.values().index(weight_dict.get(key))])
166  else:
167  lenl = len(list(k for k in data.keys() if key in k))
168  new_key = key + '_type' + str(lenl + 1)
169  data.update({new_key: weight_dict.get(key)})
170  flag = True
171  keys.append(new_key)
172  else:
173  data.update({key: weight_dict.get(key)})
174  flag = True
175  keys.append(key)
176  if flag:
177  w_file.seek(0)
178  yaml.dump(data, w_file, default_flow_style=False)
179  w_file.truncate()
180  return keys
181 
182 
183 def getMGkeys(dsid, dictionary):
184  PDFset = {'262000': "NNPDF30_lo_as_0118", '261000': "NNPDF30_nnlo_as_0118", '263000': "NNPDF30_lo_as_0130"}
185  weight_dict = {}
186  flag2 = True
187  did = int(dsid)
188  p_nom = '-1'
189  nom_pdf = dictionary.get('Nominal')
190  # print nom_pdf
191  if nom_pdf:
192  for key, val in PDFset.items():
193  if val in nom_pdf:
194  p_nom = key
195  print("Nominal PDF =" + p_nom)
196  flag2 = False
197  if ((did >= 502198) and (did <= 502353)) or ((did > 367514) and (did < 367671)) or ((did > 502197) and (did < 502354)) or did == 412006:
198  p_nom = '260000'
199  flag2 = False
200  elif ((did >= 302266) and (did <= 302290)) or ((did > 307730) and (did < 307749)) or (did == 309529) or ((did > 450764) and (did < 450775)):
201  p_nom = '247000'
202  print("got HVT" + p_nom)
203  flag2 = False
204  elif did == 312655:
205  p_nom = '263000'
206  flag2 = False
207  if flag2 is True and p_nom == '-1':
208  d = str(int(dsid) / 1000)
209  filename = '/cvmfs/atlas.cern.ch/repo/sw/Generators/MC15JobOptions/latest/share/DSID' + d + 'xxx/'
210  for files in os.listdir(filename):
211  if dsid in files:
212  filename = filename + files
213  break
214  p_nom = '-1'
215  flag = True
216  bfline = ''
217  f = open(filename, 'r')
218  for fline in iter(f):
219  if 'lhaid' in fline:
220  fline = fline.replace('\'', '&').replace('\"', '&')
221  p_nom = re.findall(r'\d+', fline)[0]
222  flag = False
223  elif "include" in fline and "MC15JobOptions/MadGraphControl" in fline:
224  bfline = fline
225  if flag:
226  if '\'' in bfline:
227  mgfile = bfline.split('\'')[1].replace('MC15JobOptions', '/cvmfs/atlas.cern.ch/repo/sw/Generators/MC15JobOptions/latest/common/MadGraph')
228  elif '\"' in bfline:
229  mgfile = bfline.split('\"')[1].replace('MC15JobOptions', '/cvmfs/atlas.cern.ch/repo/sw/Generators/MC15JobOptions/latest/common/MadGraph')
230  else:
231  mgfile = bfline.replace('MC15JobOptions', '/cvmfs/atlas.cern.ch/repo/sw/Generators/MC15JobOptions/latest/common/MadGraph')
232  if os.path.exists(mgfile):
233  cf = open(mgfile, 'r')
234  for fline in iter(cf):
235  if 'lhaid' in fline:
236  fline = fline.replace('\'', '&').replace('\"', '&')
237  p_nom = re.findall(r'\d+', fline)[0]
238  scale = dict((key, value) for (key, value) in dictionary.items() if key.endswith('scale') or key == 'Nominal')
239  if (p_nom == '-1' and len(dictionary) == 1):
240  return ['nominal']
241  if scale:
242  weight_dict.update({'MadGraph_ME_PDF' + p_nom + '_scale': {'weights': sorted(scale.values()), 'combination': 'envelope', 'type': 'scale_ME', 'nominal': nom_pdf, 'nominal_pdf': p_nom}})
243  dictionary = dict(set(dictionary.items()) - set(scale.items()))
244  dictionary.update({'Nominal': nom_pdf})
245  nominal = dict((key, value) for (key, value) in dictionary.items() if 'Nominal' in key)
246  if len(nominal) > 1:
247  weight_dict.update({'MadGraph_ME_PDF' + p_nom + '_var': {'weights': sorted(nominal.values()), 'combination': 'lhapdf', 'type': 'PDF_ME', 'nominal': nominal.get('Nominal'), 'nominal_pdf': p_nom}})
248  weight_dict.update({'MadGraph_PDF' + p_nom + '_Nominal': {'type': 'Nominal', 'weights': [nom_pdf], 'combination': 'none', 'nominal': nom_pdf, 'nominal_pdf': p_nom}})
249  dictionary = dict(set(dictionary.items()) - set(nominal.items()))
250  if dictionary:
251  weight_dict.update({'MadGraph_Other': {'weights': sorted(dictionary.values()), 'combination': 'none', 'type': 'unknown'}})
252  keys = gotoWeightData(weight_dict)
253  return keys
254 
255 
256 def weightData(gen, dictionary):
257  weight_dict = {}
258  if gen == 'Sherpa' or gen == 'MGPy':
259  nom_pdf = dictionary.get('Nominal')
260  print(nom_pdf)
261  # pdf = next(re.finditer(r'\d+$', scale.values()[0])).group(0)
262  me_weights = dict((key, value) for (key, value) in dictionary.items() if key.startswith('ME_ONLY_'))
263  dictionary = dict(set(dictionary.items()) - set(me_weights.items()))
264  scale = dict((key, value) for (key, value) in dictionary.items() if key.endswith('scale') or key == 'Nominal')
265  pdf = str(getPDF(scale.values()[0]))
266  if me_weights:
267  weight_dict.update({'Sherpa_ME_ONLY_PDF' + pdf: {'weights': sorted(me_weights.values()), 'combination': 'none', 'type': 'matrix element'}})
268  weight_dict.update({'Sherpa_ME_PDF' + pdf + '_scale': {'weights': sorted(scale.values()), 'combination': 'envelope', 'type': 'scale_ME', 'nominal': nom_pdf, 'nominal_pdf': pdf}})
269  dictionary = dict(set(dictionary.items()) - set(scale.items()))
270  dictionary.update({'Nominal': nom_pdf})
271  dictionary, weight_dict, p_nom = nom('Sherpa', dictionary, weight_dict)
272  weight_dict.update({'Sherpa_PDF' + pdf + '_Nominal': {'type': 'Nominal', 'weights': [nom_pdf], 'combination': 'none', 'nominal': nom_pdf, 'nominal_pdf': pdf}})
273  dictionary, weight_dict, alpha_dict = alph('Sherpa', dictionary, weight_dict, nom_pdf, pdf)
274  dictionary, weight_dict, hess_dict = hes('Sherpa', dictionary, weight_dict)
275  dictionary = dict(set(dictionary.items()) - set(alpha_dict.items()))
276  dictionary = dict(set(dictionary.items()) - set(hess_dict.items()))
277  other = dict((key, value) for (key, value) in dictionary.items() if key.startswith('other_'))
278  dictionary = dict(set(dictionary.items()) - set(other.items()))
279  dictionary, weight_dict = alternate('Sherpa', dictionary, weight_dict, nom_pdf)
280  if dictionary or other:
281  dictionary.update(other)
282  weight_dict.update({'Sherpa_Other': {'weights': sorted(dictionary.values()), 'combination': 'none', 'type': 'unknown'}})
283  elif gen == 'Powheg' or gen == 'aMcAtNlo' or gen == 'MadGraph':
284  nom_pdf = dictionary.get('Nominal')
285  print(nom_pdf)
286  dictionary, weight_dict, p_nom = nom(gen, dictionary, weight_dict)
287  print(p_nom)
288  weight_dict.update({gen + '_PDF' + p_nom + '_Nominal': {'type': 'Nominal', 'weights': [nom_pdf], 'combination': 'none', 'nominal': nom_pdf, 'nominal_pdf': p_nom}})
289  dictionary.update({'Nominal': nom_pdf})
290  dictionary, weight_dict = pyth(dictionary, weight_dict, p_nom, nom_pdf)
291  scale = dict((key, value) for (key, value) in dictionary.items() if key.endswith('scale') or key == 'Nominal')
292  weight_dict.update({gen + '_ME_PDF' + p_nom + '_scale': {'weights': sorted(scale.values()), 'combination': 'envelope', 'type': 'scale_ME', 'nominal': nom_pdf, 'nominal_pdf': p_nom}})
293  dictionary = dict(set(dictionary.items()) - set(scale.items()))
294  dictionary, weight_dict, alpha_dict = alph(gen, dictionary, weight_dict, nom_pdf, p_nom)
295  dictionary, weight_dict, hess_dict = hes(gen, dictionary, weight_dict)
296  for pdf in list_of_NNLOPDF:
297  temp = dict((key, value) for (key, value) in dictionary.items() if key.endswith(pdf))
298  if len(temp) > 1:
299  weight_dict.update({gen + '_ME_PDF' + pdf + '_scale': {'weights': sorted(temp.values()), 'combination': 'envelope', 'type': 'Scale_ME', 'nominal': sorted(temp.values())[-1], 'nominal_pdf': pdf}})
300  dictionary = dict(set(dictionary.items()) - set(temp.items()))
301  if any(pdf in d for d in dictionary.keys()):
302  dictionary.update({'PDF' + pdf: sorted(temp.values())[-1]})
303  # print({'PDF' + pdf: sorted(temp.values())[-1]})
304  dictionary = dict(set(dictionary.items()) - set(alpha_dict.items()))
305  for w in list_NNLO:
306  temp = dict((key, value) for (key, value) in dictionary.items() if key.startswith(w))
307  if temp:
308  container = list(k for k, v in temp.items() if w in v)[0]
309  if 'scale' in container:
310  weight_dict.update({gen + '_' + container: {'weights': sorted(temp.values()), 'combination': 'envelope', 'type': 'Scale_ME', 'nominal': nom_pdf, 'nominal_pdf': p_nom}})
311  elif 'NOMINAL' in container:
312  weight_dict.update({gen + '_' + container: {'weights': sorted(temp.values()), 'combination': 'none', 'type': 'PDF_ME', 'nominal_pdf': p_nom}})
313  else:
314  weight_dict.update({gen + '_' + container: {'weights': sorted(temp.values()), 'combination': 'none', 'type': 'unknown', 'nominal_pdf': p_nom}})
315  dictionary = dict(set(dictionary.items()) - set(temp.items()))
316  dictionary = dict(set(dictionary.items()) - set(hess_dict.items()))
317  dictionary, weight_dict = alternate(gen, dictionary, weight_dict, p_nom)
318  if dictionary:
319  weight_dict.update({gen + '_Other': {'weights': sorted(dictionary.values()), 'combination': 'none', 'type': 'unknown'}})
320  keys = gotoWeightData(weight_dict)
321  return keys
322 
323 
324 def scale(value):
325  ph_name = ''
326  if 'dyn_scale_choice' in value:
327  ph_name = 'other_' + value.strip()
328  elif 'mur=1' in value and 'muf=1' in value:
329  ph_name = 'Nominal'
330  elif 'muR=1.000000E+00' in value and 'muF=1.000000E+00' in value:
331  ph_name = 'Nominal'
332  elif 'muR=1.000000E+00' in value and 'muF=1.000000E+00' in value:
333  ph_name = 'Nominal'
334  else:
335  w_name = re.split('_| ', value)
336  for part in w_name:
337  if 'mur' in part.lower():
338  if '25' in part:
339  ph_name = ph_name + 'muR25'
340  elif '2' in part:
341  ph_name = ph_name + 'muR2'
342  elif '5' in part:
343  ph_name = ph_name + 'muR5'
344  elif '4' in part:
345  ph_name = ph_name + 'muR4'
346  if 'muf' in part.lower():
347  if '25' in part:
348  ph_name = ph_name + 'muF25'
349  elif '2' in part:
350  ph_name = ph_name + 'muF2'
351  elif '5' in part:
352  ph_name = ph_name + 'muF5'
353  elif '4' in part:
354  ph_name = ph_name + 'muF4'
355  return ph_name
356 
357 
358 def getPDF(ss):
359  num, res = 0, 0
360  # start traversing the given string
361  for i in range(len(ss)):
362 
363  if ss[i] >= "0" and ss[i] <= "9":
364  num = num * 10 + int(int(ss[i]) - 0)
365  else:
366  res = max(res, num)
367  num = 0
368 
369  return max(res, num)
370 
371 
372 def makeMGDatabase(values):
373  dic = {}
374  for value in values:
375  ph_name = ''
376  if 'mur' in value.lower():
377  ph_name = scale(value)
378  ph_name = ph_name + '_scale'
379  elif 'Member' in value:
380  # pdf = next(re.finditer(r'\d+$', value.strip())).group(0)
381  pdf = value.split()[1]
382  if pdf == '0':
383  ph_name = 'Nominal'
384  else:
385  ph_name = 'Nominal_' + pdf
386  elif value == 'nominal':
387  ph_name = 'Nominal'
388  else:
389  ph_name = value.strip()
390  dic.update({ph_name: value})
391  print(len(dic))
392  return dic
393 
394 
395 # ########### makePowhegDatabase ###################
396 MGcentral_pdf = ['247000', '260000']
397 
398 
399 def makePowhegDatabase(dsid, values):
400  dic = {}
401  for value in values:
402  ph_name = ''
403  if value.strip().lower() == 'nominal':
404  ph_name = 'Nominal'
405  elif ('mur' in value.lower() or 'muf' in value.lower()):
406  if 'isr' in value:
407  ph_name = 'other_' + value
408  elif 'dyn_scale_choice' in value:
409  ph_name = 'other_' + value
410  else:
411  ph_name = scale(value.strip())
412  if ph_name == '':
413  s_name = value.split(',')
414  for element in s_name:
415  if 'mur' in element.lower() and '5' in element:
416  ph_name = ph_name + 'muRdw'
417  elif 'mur' in element.lower() and '2' in element:
418  ph_name = ph_name + 'muRup'
419  elif 'muf' in element.lower() and '5' in element:
420  ph_name = ph_name + 'muFdw'
421  elif 'muf' in element.lower() and '2' in element:
422  ph_name = ph_name + 'muFup'
423  ph_name = ph_name + '_scale'
424  if ph_name != 'Nominal':
425  ph_name = ph_name + '_scale'
426  for key in NNLO_PDF.keys():
427  if value.strip().endswith(key):
428  for pd in NNLO_PDF.get(key):
429  v = list(k for k in values if k.strip().endswith(pd))
430  if len(v) > 0:
431  ph_name = ph_name + '_PDF' + pd
432  elif 'pdf' in value.lower() and 'memberid' in value.lower():
433  pdf = str(getPDF(value.split()[0]))
434  p_var = str(getPDF(value.split()[1]))
435  # if any(pdf == val for val in list_of_NNLOPDF):
436  if any(pdf == val for val in MGcentral_pdf):
437  if p_var == '0':
438  ph_name = 'Nominal'
439  else:
440  ph_name = 'Nominal_' + p_var
441  else:
442  ph_name = 'PDF_' + pdf + '_' + p_var
443  elif 'pdf' in value.lower() and any(char.isdigit() for char in value.strip()):
444  # pdf = next(re.finditer(r'\d+$', value.strip())).group(0)
445  pdf = str(getPDF(value.strip()))
446  if any(pdf == alpha for alpha in alpha_S.keys()):
447  ph_name = 'PDF' + pdf + '_alphaS'
448  elif any(str(int(pdf) - 1) in val or str(int(pdf) + 1) in val for val in values):
449  p_var = str(int(pdf) % 100)
450  if any(str(int(pdf) - int(p_var)) == pdfset for pdfset in pdfsets):
451  p_nom = str(int(pdf) - int(p_var))
452  else:
453  p_var = str(int(pdf) % 1000)
454  p_nom = str(int(pdf) - (int(p_var)))
455  if any(p_nom in val for val in values):
456  if any(p_nom == hess for hess in Hessian):
457  ph_name = 'PDF' + p_nom + '_' + p_var + '_hessian'
458  else:
459  # if any(value.strip().lower() == 'nominal' for value in values):
460  if p_var == '0':
461  ph_name = 'PDF' + p_nom
462  elif p_nom == '260000':
463  if p_var == '0':
464  ph_name = 'Nominal'
465  else:
466  ph_name = 'Nominal_' + p_var
467  else:
468  ph_name = 'PDF' + p_nom + '_' + p_var
469  else:
470  ph_name = 'Nominal_' + p_var
471  else:
472  ph_name = 'PDF' + pdf
473  elif (value.strip() != '9' or value.strip() == '') and (dsid == d for d in affectedDSID):
474  print("nominal found: " + value)
475  ph_name = 'Nominal'
476  elif value.strip() == '110' and (dsid == d for d in affectedDSID):
477  print("alt. PDF found")
478  ph_name = 'PDF246800'
479  else:
480  if value.startswith('Var3c') or value.startswith('hard') or value.startswith('isr'):
481  ph_name = 'Py8_' + value
482  else:
483  ph_name = value.strip()
484  # if value.strip().isdigit() and dsid not in affectedDSID:
485  # ph_name = 'others_' + value.strip()
486  ph_name = ph_name.replace(':', '_')
487  dic.update({ph_name: value})
488  # print sorted(dic.iteritems())
489  print(len(dic))
490  return dic
491 
492 
493 def makeSherpaDatabase(values):
494  dic = {}
495  if len(values) == 1:
496  ph_name = 'Nominal'
497  dic.update({ph_name: values[0].strip()})
498  else:
499  for value in values:
500  ph_name = ''
501  if value.startswith("ME_ONLY_"):
502  ph_name = value.strip()
503  # dic.update({ph_name: value})
504  # continue
505  elif value.strip().isdigit():
506  ph_name = 'others_' + value.strip()
507  elif 'mu' in value.lower():
508  ph_name = scale(value)
509  if ph_name == '':
510  if 'ASS' in value.split('_')[-1]:
511  ph_name = "other_" + value.strip()
512  dic.update({ph_name: value})
513  continue
514  pdf = str(getPDF(value))
515  # print pdf
516  if any(pdf == alpha for alpha in alpha_S.keys()):
517  ph_name = 'PDF' + pdf + '_alphaS'
518  elif any('0.5' in val and pdf in val for val in values) and any(pdf == pdfset for pdfset in pdfsets):
519  ph_name = 'Nominal'
520  elif any(str(int(pdf) - 1) in val for val in values) or any(str(int(pdf) + 1) in val for val in values):
521  p_var = str(int(pdf) % 100)
522  if any(str(int(pdf) - int(p_var)) == pdfset for pdfset in pdfsets):
523  p_nom = str(int(pdf) - int(p_var))
524  else:
525  p_var = str(int(pdf) % 1000)
526  p_nom = str(int(pdf) - (int(p_var)))
527  if any('0.5' in val and p_nom in val for val in values):
528  ph_name = 'Nominal_' + p_var
529  elif p_nom in Hessian:
530  ph_name = 'PDF' + p_nom + '_' + p_var + '_hessian'
531  else:
532  ph_name = 'PDF' + p_nom + '_' + p_var
533  else:
534  ph_name = 'PDF' + pdf
535  else:
536  ph_name = ph_name + '_scale'
537  else:
538  ph_name = value.strip()
539  dic.update({ph_name: value})
540  print(len(dic))
541  # print sorted(dic.iteritems())
542  return dic
543 
544 
545 def getKey(filename):
546  new_dict = {}
547  values = 'none'
548  keys = []
549  dsid = filename.split('/')[-1].split('.')[0].split('_')[-1]
550  if file_is_empty(filename):
551  keys = ['None']
552  else:
553  with open(filename) as f:
554  values = f.read().splitlines()
555  print(len(values))
556  if len(values) == 1 and values[0].strip() == 'nominal':
557  keys = ['nominal']
558  # if 'MGPy' in filename:
559  # new_dict = makeSherpaDatabase(values)
560  # keys = weightData('MGPy', new_dict)
561  elif 'MadGraph' in filename:
562  new_dict = makePowhegDatabase(dsid, values)
563  keys = getMGkeys(dsid, new_dict)
564  else:
565  if 'Sherpa' in filename or "Sh_" in filename:
566  new_dict = makeSherpaDatabase(values)
567  keys = weightData('Sherpa', new_dict)
568  elif 'aMcAtNlo' in filename:
569  new_dict = makePowhegDatabase(dsid, values)
570  keys = weightData('aMcAtNlo', new_dict)
571  elif 'Powheg' in filename or 'phpy' in filename.lower():
572  new_dict = makePowhegDatabase(dsid, values)
573  keys = weightData('Powheg', new_dict)
574 
575  return keys
576 
577 
578 def buildDatabase(filename):
579  dsid = filename.split('/')[-1].split('.')[0].split('_')[-1]
580  key = getKey(filename)
581  if not os.path.exists("%s/data/DSID_Database.yaml" % SystToolsPath):
582  makefile("%s/data/DSID_Database.yaml" % SystToolsPath)
583  d_file = open("%s/data/DSID_Database.yaml" % SystToolsPath, 'r+')
584  database = yaml.load(d_file)
585  database.update({dsid: key})
586  d_file.seek(0)
587  yaml.dump(database, d_file, default_flow_style=False)
588 
589 
590 def main(argv):
591  parser = argparse.ArgumentParser(description='Weight file needed')
592  parser.add_argument('file', type=str, nargs='+')
593  args = parser.parse_args()
594  for f in args.file:
595  buildDatabase(f)
596 
597 
598 if __name__ == "__main__":
599  main(sys.argv[1:])
buildDatabase.scale
def scale(value)
Definition: buildDatabase.py:324
buildDatabase.makeSherpaDatabase
def makeSherpaDatabase(values)
Definition: buildDatabase.py:493
buildDatabase.pyth
def pyth(dictionary, weight_dict, p_nom, nom)
Definition: buildDatabase.py:132
replace
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition: hcg.cxx:307
max
#define max(a, b)
Definition: cfImp.cxx:41
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
index
Definition: index.py:1
buildDatabase.makeMGDatabase
def makeMGDatabase(values)
Definition: buildDatabase.py:372
buildDatabase.weightData
def weightData(gen, dictionary)
Definition: buildDatabase.py:256
buildDatabase.getMGkeys
def getMGkeys(dsid, dictionary)
Definition: buildDatabase.py:183
buildDatabase.main
def main(argv)
Definition: buildDatabase.py:590
buildDatabase.getKey
def getKey(filename)
Definition: buildDatabase.py:545
buildDatabase.makefile
def makefile(filename)
Definition: buildDatabase.py:39
buildDatabase.nom
def nom(gen, dictionary, weight_dict)
Definition: buildDatabase.py:68
buildDatabase.alternate
def alternate(gen, dictionary, weight_dict, p_nom)
Definition: buildDatabase.py:110
fillPileUpNoiseLumi.next
next
Definition: fillPileUpNoiseLumi.py:52
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
buildDatabase.getPDF
def getPDF(ss)
Definition: buildDatabase.py:358
calibdata.exit
exit
Definition: calibdata.py:236
DerivationFramework::TriggerMatchingUtils::sorted
std::vector< typename T::value_type > sorted(T begin, T end)
Helper function to create a sorted vector from an unsorted one.
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
buildDatabase.alph
def alph(gen, dictionary, weight_dict, nom_pdf, p_nom)
Definition: buildDatabase.py:92
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
buildDatabase.makePowhegDatabase
def makePowhegDatabase(dsid, values)
Definition: buildDatabase.py:399
Trk::open
@ open
Definition: BinningType.h:40
buildDatabase.hasNumbers
def hasNumbers(inputString)
Definition: buildDatabase.py:48
CaloLCW_tf.group
group
Definition: CaloLCW_tf.py:28
buildDatabase.hes
def hes(gen, dictionary, weight_dict)
Definition: buildDatabase.py:52
str
Definition: BTagTrackIpAccessor.cxx:11
dbg::print
void print(std::FILE *stream, std::format_string< Args... > fmt, Args &&... args)
Definition: SGImplSvc.cxx:70
buildDatabase
Definition: buildDatabase.py:1
buildDatabase.gotoWeightData
def gotoWeightData(weight_dict)
Definition: buildDatabase.py:154
Trk::split
@ split
Definition: LayerMaterialProperties.h:38
buildDatabase.buildDatabase
def buildDatabase(filename)
Definition: buildDatabase.py:578
buildDatabase.file_is_empty
def file_is_empty(path)
Definition: buildDatabase.py:44