ATLAS Offline Software
Loading...
Searching...
No Matches
postprocessors/reweighter.py
Go to the documentation of this file.
1# Copyright (C) 2002-2022 CERN for the benefit of the ATLAS collaboration
2
3from AthenaCommon import Logging
4from ...decorators import timed
5from ...utility import FileParser
6from ..generators import multicore_untimed, singlecore_untimed
7import collections
8import os
9import shutil
10import re
11
12
13logger = Logging.logging.getLogger("PowhegControl")
14
15
16WeightTuple = collections.namedtuple("WeightTuple", ["parameter_settings", "keywords", "ID", "name", "combine", "group", "parallel_xml_compatible"])
17
18def default_weight_exists_already(powheg_LHE_output):
19 """! Returns True if the LHE file contains a weight called "nominal".
20
21 The weight must be present before the beginning of the first <event> block,
22 otherwise the function returns False.
23
24 @param powheg_LHE_output Name of LHE file produced by PowhegBox.
25 """
26 search_string = "<weight id='0' >default</weight>"
27 with open(powheg_LHE_output, 'r') as lhe_file:
28 for line in lhe_file:
29 if search_string in line:
30 return True
31 if '<event>' in line:
32 # Important to break the loop here with a return, otherwise the
33 # entire (often very large) file is scanned!
34 return False
35
36
37@timed("reweighting")
38def reweighter(process, weight_groups, powheg_LHE_output):
39 """! Add a series of additional weights to an LHE file.
40
41 @param process PowhegBox process.
42 @param weight_groups OrderedDict containing groups of weights to add.
43 @param powheg_LHE_output Name of LHE file produced by PowhegBox.
44 """
45 logger.info("Starting to run PowhegControl PDF and QCD scale reweighter")
46
47 weight_list = []
48
49
50 non_weight_attributes = ["parameter_names", "combination_method", "keywords"]
51
52
53 xml_kwds = {"renscfact": "renscfact", "facscfact": "facscfact", "lhans1": "lhapdf", "lhans2": "lhapdf", "width_correction" : "width_correction", "run_mode": "run_mode"}
54
55 # Initial values for scale, PDF and other weight groups
56 _idx_scale_start, _idx_PDF_start, _idx_other_start = 1001, 2001, 3001
57
58 if not process.use_XML_reweighting:
59 logger.warning("... XML-style reweighting is not enabled for this process. Will use old (multiple-pass) method.")
60
61 # Construct ordered list of weights
62 for group_name, weight_group in weight_groups.items():
63 logger.info("Preparing weight group: {:<19} with {} weights".format(group_name, len(weight_group) - len(non_weight_attributes)))
64
65 # Name -> keyword dictionary is different if this is an XML reweighting
66 is_parallel_xml_compatible = process.use_XML_reweighting and all([k in xml_kwds.keys() for _kw_set in weight_group["keywords"] for k in _kw_set])
67 if is_parallel_xml_compatible:
68 _keywords = [list(set([xml_kwds[k] for k in _kw_set])) for _kw_set in weight_group["keywords"]]
69 else:
70 if process.use_XML_reweighting:
71 logger.warning("... this weight group is incompatible with XML-style reweighting. Will use old (multiple-pass) method.")
72 _keywords = weight_group["keywords"]
73 keyword_dict = dict((n, k) for n, k in zip(weight_group["parameter_names"], _keywords))
74
75 # Common arguments for all weights
76 tuple_kwargs = {"keywords": keyword_dict, "combine": weight_group["combination_method"], "group": group_name, "parallel_xml_compatible": is_parallel_xml_compatible}
77
78 # Nominal variation: ID=0
79 if group_name == "nominal" and not default_weight_exists_already(powheg_LHE_output):
80 weight_list.append(WeightTuple(parameter_settings=weight_group["nominal"], ID=0, name="nominal", **tuple_kwargs))
81
82 # Scale variations: ID=1001+
83 elif group_name == "scale_variation":
84 for idx, name in enumerate([k for k in weight_group.keys() if k not in non_weight_attributes], start=_idx_scale_start):
85 weight_list.append(WeightTuple(parameter_settings=weight_group[name], ID=idx, name=name, **tuple_kwargs))
86 _idx_scale_start = idx + 1
87
88 # PDF variations: ID=2001+
89 elif group_name == "PDF_variation":
90 for idx, name in enumerate([k for k in weight_group.keys() if k not in non_weight_attributes], start=_idx_PDF_start):
91 weight_list.append(WeightTuple(parameter_settings=weight_group[name], ID=idx, name=name, **tuple_kwargs))
92 _idx_PDF_start = idx + 1
93
94 # Other variations: ID=3001+
95 else:
96 for idx, name in enumerate([k for k in weight_group.keys() if k not in non_weight_attributes], start=_idx_other_start):
97 weight_list.append(WeightTuple(parameter_settings=weight_group[name], ID=idx, name=name, **tuple_kwargs))
98 _idx_other_start = idx + 1
99
100 # Make backup of generation statistics
101 if os.path.isfile("pwgcounters.dat"):
102 shutil.copy("pwgcounters.dat", "pwgcounters.dat.bak")
103
104 # ... the Powheg input card
105 try:
106 shutil.copy("powheg.input", "powheg.input.before_reweighting")
107 except IOError:
108 raise IOError("Powheg input card ('powheg.input') cannot be found at the start of reweighting. In a normal setup, PowhegControl generates this input card. Its absence is probably a sign of problems --- please investigate or contact the PowhegControl experts.")
109
110 # ... and also backup unweighted events
111 try:
112 shutil.copy(powheg_LHE_output, "{}.before_reweighting".format(powheg_LHE_output))
113 except IOError:
114 raise IOError("Nominal LHE file could not be found. Probably POWHEG-BOX crashed during event generation.")
115
116 # Do XML-reweighting
117 if process.use_XML_reweighting:
118 # Add nominal weight if not already present
119 if not default_weight_exists_already(powheg_LHE_output) and not any([weight.group == "nominal" for weight in weight_list]):
120 if process.has_parameter("run_mode") and process.parameters_by_keyword("run_mode")[0].value != 1 and hasattr(process, "reweight_for_MiNNLO") and process.reweight_for_MiNNLO:
121 weight_list = [WeightTuple(ID=0, name="nominal", group="nominal", parallel_xml_compatible=True, parameter_settings=[("run_mode",1)], keywords={"run_mode":["run_mode"]}, combine=None)] + weight_list
122 else:
123 weight_list = [WeightTuple(ID=0, name="nominal", group="nominal", parallel_xml_compatible=True, parameter_settings=[], keywords=None, combine=None)] + weight_list
124
125 FileParser("powheg.input").text_replace("pdfreweight .*", "pdfreweight 0")
126 # Construct xml output
127 xml_lines, serial_xml_weight_list, current_weightgroup = [], [], None
128 for weight in weight_list:
129 # Group elements
130 if weight.parallel_xml_compatible and weight.group != current_weightgroup:
131 xml_lines.append("</weightgroup>")
132 current_weightgroup = weight.group
133 xml_lines.append("<weightgroup name='{}' combine='{}'>".format(weight.group, weight.combine))
134 # Weight elements
135 if weight.parallel_xml_compatible:
136 keyword_pairs_str = " ".join(["{}={}".format(k, v) for p, v in weight.parameter_settings for k in weight.keywords[p]])
137 xml_lines.append("<weight id='{}'> {} </weight>".format(weight.ID, keyword_pairs_str))
138 else:
139 serial_xml_weight_list.append(weight)
140 xml_lines.append(xml_lines.pop(0)) # move the closing tag to the end
141 n_parallel_xml_weights = len(weight_list) - len(serial_xml_weight_list)
142
143 # First iterate over the simultaneous XML weights
144 if n_parallel_xml_weights > 0:
145 # Write xml output
146 with open("reweighting_input.xml", "w") as f_rwgt:
147 f_rwgt.write("<initrwgt>\n")
148 [f_rwgt.write("{}\n".format(xml_line)) for xml_line in xml_lines]
149 f_rwgt.write("</initrwgt>")
150
151 # Add reweighting lines to runcard
152 # TODO: add check that the file contains these keywords, otherwise raise an error about
153 # what is probably a faulty process interface!
154 FileParser("powheg.input").text_replace("rwl_file .*", "rwl_file 'reweighting_input.xml'")
155 FileParser("powheg.input").text_replace("rwl_add .*", "rwl_add 1")
156 FileParser("powheg.input").text_replace("clobberlhe .*", "clobberlhe 1")
157 if process.has_parameter("run_mode") and process.parameters_by_keyword("run_mode")[0].value != 1 and hasattr(process, "reweight_for_MiNNLO") and process.reweight_for_MiNNLO:
158 FileParser("powheg.input").text_replace("run_mode .*", "run_mode 1")
159
160 logger.info("Preparing simultaneous calculation of {} additional weights for generated events.".format(n_parallel_xml_weights))
161
162 # Allow RES processes to do their reweighting with manyseeds enabled, or they will look for the wrong files
163 if list(process.parameters_by_name("manyseeds"))[0].value == 1:
164 if process.powheg_version == "RES":
165 os.system('cp reweighting_input.xml backup_of_reweighting_input.xml')
166 os.system('cp powheg.input powheg.input.for_reweighting')
167 multicore_untimed(process)
168 else:
169 FileParser("powheg.input").text_replace("manyseeds .*", "manyseeds 0")
170 FileParser("powheg.input").text_replace("parallelstage .*", "parallelstage -1")
171 os.system('cp reweighting_input.xml backup_of_reweighting_input.xml')
172 os.system('cp powheg.input powheg.input.for_reweighting')
173 singlecore_untimed(process)
174 else:
175 os.system('cp reweighting_input.xml backup_of_reweighting_input.xml')
176 os.system('cp powheg.input powheg.input.for_reweighting')
177 singlecore_untimed(process)
178
179 # Move the reweighted file back
180 rename_LHE_output(powheg_LHE_output)
181
182 # Iterate over any variations which require non-simultaneous reweighting
183 if len(serial_xml_weight_list) > 0:
184 logger.info("Preparing individual calculation of {} additional weights for generated events.".format(len(serial_xml_weight_list)))
185 shutil.move("reweighting_input.xml", "reweighting_input.nominal")
186 for idx_weight, weight in enumerate(serial_xml_weight_list, start=1):
187 add_single_weight(process, weight, idx_weight, len(serial_xml_weight_list), use_XML=True)
188 rename_LHE_output(powheg_LHE_output)
189 shutil.move("reweighting_input.nominal", "reweighting_input.xml")
190
191 # Do non-XML-reweighting
192 else:
193 # Iterate over any variations which require old-style reweighting
194 logger.info("Preparing individual calculation of {} additional weights for generated events.".format(len(weight_list)))
195 for idx_weight, weight in enumerate(weight_list, start=1):
196 add_single_weight(process, weight, idx_weight, len(weight_list), use_XML=False)
197 rename_LHE_output(powheg_LHE_output)
198
199 if process.use_XML_reweighting:
200 comment_patterns = ["#pdf", "#rwgt", "#new weight", "#matching", " #Random" ]
201 if process.remove_oldStyle_rwt_comments:
202
203 logger.info("Removing comment lines from lhe file - these lines can be added back using the 'remove_oldStyle_rwt_comments=False' argument in generate()")
204 # Remove comment lines, which may cause a crash in Pythia
205 for pattern in comment_patterns:
206 removed = FileParser(powheg_LHE_output).text_remove("^"+pattern)
207 logger.info("{} line(s) starting with '{}' were removed from {}".format(removed, pattern, powheg_LHE_output))
208 else:
209 logger.info("Fixing comment lines from lhe file - these lines can be simply removed using the 'remove_oldStyle_rwt_comments=True' argument in generate()")
210 for pattern in comment_patterns: # no whitespace needed in patterns here
211 repair_comment_lines(powheg_LHE_output, pattern) # the last pattern starts with a space
212
213 replacelist = []
214 # Rename all weights
215 for weight in weight_list:
216 replacelist += [[".* id='{}' .*".format(weight.ID), "<weight id='{weight_id}'>{weight_name}</weight>".format(weight_id=weight.ID, weight_name=weight.name), 1]]
217
218 # Correct LHE version identification; otherwise Pythia will treat all files as v1
219 replacelist += [['LesHouchesEvents version="1.0"', 'LesHouchesEvents version="3.0"', 1]]
220 FileParser(powheg_LHE_output).text_replace_multi(replacelist)
221
222 # Restore generation statistics and initial runcard
223 shutil.move("powheg_nominal.input", "powheg.input")
224 if os.path.isfile("pwgcounters.dat.bak"):
225 shutil.move("pwgcounters.dat.bak", "pwgcounters.dat")
226
227def add_single_weight(process, weight, idx_weight, n_total, use_XML):
228 """! Add a single additional weight to an LHE file.
229
230 @param process PowhegBox process.
231 @param weight Tuple with details of the weight to be added.
232 @param idx_weight Which number this weight is.
233 @param n_total Total number of weights being added.
234 @param use_XML Whether to use XML or old-style reweighting.
235 """
236 @timed("weight variation {}/{}".format(idx_weight, n_total))
237 def __timed_inner_fn():
238 logger.info("... weight name is: {}".format(weight.name))
239 logger.info("... weight index ID is: {}".format(weight.ID))
240 shutil.copy("powheg_nominal.input", "powheg.input")
241
242 # Make appropriate runcard changes
243 if use_XML:
244 FileParser("powheg.input").text_replace("rwl_file .*", "rwl_file 'reweighting_input.xml'")
245 FileParser("powheg.input").text_replace("rwl_add .*", "rwl_add 1")
246 FileParser("powheg.input").text_replace("clobberlhe .*", "clobberlhe 1")
247 FileParser("powheg.input").text_replace("pdfreweight .*", "pdfreweight 0")
248 else:
249 # As the nominal process has already been run, turn on compute_rwgt
250 FileParser("powheg.input").text_replace("compute_rwgt 0", "compute_rwgt 1")
251 # Ensure that manyseeds/parallelstage are turned off, as these cause the reweighting to crash
252 FileParser("powheg.input").text_replace("manyseeds .*", "manyseeds 0")
253 FileParser("powheg.input").text_replace("parallelstage .*", "parallelstage -1")
254
255 # Apply weight settings
256 for (parameter, value) in weight.parameter_settings:
257 try:
258 for keyword in weight.keywords[parameter]:
259 FileParser("powheg.input").text_replace("^{} .*".format(keyword), "{} {}".format(keyword, value))
260 logger.info("... setting {} to {}".format(parameter, value))
261 except KeyError:
262 logger.warning("Parameter '{}' not recognised. Cannot reweight!".format(parameter))
263
264 if use_XML:
265 # Create XML reweighting file
266 with open("reweighting_input.xml", "w") as f_rwgt:
267 f_rwgt.write("<initrwgt>\n")
268 f_rwgt.write("<weightgroup name='{}' combine='{}'>\n".format(weight.group, weight.combine))
269 f_rwgt.write("<weight id='{}'> </weight>\n".format(weight.ID))
270 f_rwgt.write("</weightgroup>\n")
271 f_rwgt.write("</initrwgt>")
272 else:
273 # Change reweighting metadata in runcard
274 FileParser("powheg.input").text_replace("lhrwgt_descr .*", "lhrwgt_descr '{}'".format(weight.name))
275 FileParser("powheg.input").text_replace("lhrwgt_id .*", "lhrwgt_id '{}'".format(weight.ID))
276 FileParser("powheg.input").text_replace("lhrwgt_group_combine .*", "lhrwgt_group_combine '{}'".format(weight.combine))
277 FileParser("powheg.input").text_replace("lhrwgt_group_name .*", "lhrwgt_group_name '{}'".format(weight.group))
278
279 # Run the process until termination
280 if list(process.parameters_by_name("manyseeds"))[0].value == 1:
281 if process.powheg_version == "RES":
282 multicore_untimed(process)
283 else:
284 FileParser("powheg.input").text_replace("manyseeds .*", "manyseeds 0")
285 FileParser("powheg.input").text_replace("parallelstage .*", "parallelstage -1")
286 singlecore_untimed(process)
287 else:
288 singlecore_untimed(process)
289
290 # Run single weight variation
291 __timed_inner_fn()
292
293
294def rename_LHE_output(powheg_LHE_output):
295 reweighted_events_file_name = powheg_LHE_output.replace(".lhe", "-rwgt.lhe")
296 try:
297 shutil.move(reweighted_events_file_name, powheg_LHE_output)
298 except IOError:
299 raise IOError("Reweighted LHE file '{filename}' could not be found. Probably POWHEG-BOX crashed during reweighting.".format(filename=reweighted_events_file_name))
300
301
302def repair_comment_lines(lheFile, pattern):
303 if pattern is None:
304 return
305 if not os.path.isfile("{}.before_reweighting".format(lheFile)):
306 logger.error("Impossible to find file {}.before_reweighting".format(lheFile))
307 raise IOError
308
309 # create backup file
310 shutil.move(lheFile, "{}.text_replace_backup".format(lheFile))
311
312 n_found = 0
313 n_events = 0
314 with open("{}.text_replace_backup".format(lheFile), "rb") as f_input:
315 line_in = f_input.readline()
316 while line_in:
317 if re.search("^"+pattern.lstrip(), line_in.decode().lstrip()):
318 n_found += 1
319 elif re.search("^</event>", line_in.decode().lstrip()):
320 n_events += 1
321 line_in = f_input.readline()
322
323 n_found_noWeights = 0
324 n_events_noWeights = 0
325 with open("{}.before_reweighting".format(lheFile), "rb") as f_input:
326 line_in = f_input.readline()
327 while line_in:
328 if re.search("^"+pattern.lstrip(), line_in.decode().lstrip()):
329 n_found_noWeights += 1
330 elif re.search("^</event>", line_in.decode().lstrip()):
331 n_events_noWeights += 1
332 line_in = f_input.readline()
333
334 # in case anything turns bad, will give up fixing
335 impossible_to_fix = False
336
337 # initialise counters to 0
338 n_replaced = 0
339 n_added_back = 0
340 with open(lheFile, "w") as f_output:
341 # first strategy: loop over the file with weights, and replace the relevant lines from the files without weights
342 if n_found == n_found_noWeights:
343 # loop in parallel on the lhe file with weights that we want to fix, and on the lhe file without weights from which we'll take the correct comment lines
344 with open("{}.text_replace_backup".format(lheFile), "rb") as f_input, open("{}.before_reweighting".format(lheFile), "rb") as f_input_noWeights:
345 line_in = f_input.readline()
346 line_in_noWeights = f_input_noWeights.readline()
347 while line_in: # loop on the lines of the output file with weights
348 if re.search("^"+pattern.lstrip(), line_in.decode().lstrip()): # found pattern, this line may need to be replaced (ignoring leading whitespaces)
349 processed = False
350 while line_in_noWeights: # loop on the next lines in the other files
351 if re.search("^"+pattern, line_in_noWeights.decode().lstrip()): # found pattern, using this line as replacement (ignoring leading whitespaces)
352 if (line_in.decode().rstrip().lstrip() != line_in_noWeights.decode().rstrip().lstrip()):
353 f_output.write(line_in_noWeights.decode())
354 n_replaced += 1
355 else:
356 f_output.write(line_in.decode())
357 line_in_noWeights = f_input_noWeights.readline()
358 processed = True
359 break # end loop over the other file for the moment
360 else: # pattern not found in the other file, go to next line
361 line_in_noWeights = f_input_noWeights.readline() # keep trying
362 if processed: # if this line has been processed, no need to do the next line
363 line_in = f_input.readline() # next line in output file
364 else : # line hasn't been processed, it means the other file doesn't have enough appropriate lines
365 impossible_to_fix = True
366 break # end loop over patterns, giving up fixing
367 if impossible_to_fix: # it's pointless to continue
368 break
369 # no pattern not found, we keep the line as-is and go to the next
370 f_output.write(line_in.decode())
371 line_in = f_input.readline()
372
373 # this is a cross-check - both file should have the same number of comment lines
374 while line_in_noWeights: # finish processing the other file - we shouldn't find any more lines with pattern
375 if re.search("^"+pattern, line_in_noWeights.decode()): # found pattern, something is wrong
376 impossible_to_fix = True
377 break
378 if impossible_to_fix: # it's pointless to continue
379 break
380 line_in_noWeights = f_input_noWeights.readline()
381 # alternative strategy: loop over file without weights, and add the comments from it just before the end of event
382 elif n_found == 0 and n_found_noWeights == n_events_noWeights and n_events == n_events_noWeights:
383 # loop in parallel on the lhe file with weights that we want to fix, and on the lhe file without weights from which we'll take the correct comment lines
384 with open("{}.text_replace_backup".format(lheFile), "rb") as f_input, open("{}.before_reweighting".format(lheFile), "rb") as f_input_noWeights:
385 line_in = f_input.readline()
386 line_in_noWeights = f_input_noWeights.readline()
387 while line_in_noWeights:
388 if re.search("^"+pattern, line_in_noWeights.decode().lstrip()):
389 while line_in:
390 if re.search("^</event>", line_in.decode().lstrip()):
391 f_output.write(line_in_noWeights.decode())
392 f_output.write(line_in.decode())
393 n_added_back += 1
394 line_in = f_input.readline()
395 break
396 else:
397 f_output.write(line_in.decode())
398 line_in = f_input.readline()
399 line_in_noWeights = f_input_noWeights.readline()
400 while line_in:
401 f_output.write(line_in.decode())
402 line_in = f_input.readline()
403
404 # processing of files ended, now handling the outcome
405 if n_found == 0 and n_found_noWeights == 0: # no line found with this pattern in the file to be fixed
406 shutil.move("{}.text_replace_backup".format(lheFile), lheFile)
407 logger.info("No line with pattern '{}' was found in lhe file before or after reweighting, so need to fix it".format(pattern))
408 elif impossible_to_fix: # we couldn't fix it, so we get the backup copy back, and remove the problematic lines
409 shutil.move("{}.text_replace_backup".format(lheFile), lheFile)
410 logger.info("Impossible to fix the possibly buggy comment lines with pattern {} in {} using the corresponding lines from {}.before_reweighting".format(pattern, lheFile, lheFile))
411 logger.info("Keeping those lines as they are in '{}".format(lheFile))
412 else:
413 os.remove("{}.text_replace_backup".format(lheFile))
414 if n_replaced != 0:
415 logger.info("{} line(s) starting with '{}' replaced in {} using the corresponding line(s) from {}.before_reweighting".format(n_replaced, pattern, lheFile, lheFile))
416 elif n_found != 0 and n_replaced == 0:
417 logger.info("No line starting with '{}' was replaced in {} ({} were found) since none seems buggy".format(pattern, lheFile, n_found))
418 elif n_added_back !=0:
419 logger.info("{} line(s) starting with '{}' added back in {} using the corresponding line(s) from {}.before_reweighting".format(n_added_back, pattern, lheFile, lheFile))
Utility class to perform simple operations on files.
STL class.
WeightTuple
Tuple to contain arbitrary grouped weights.
add_single_weight(process, weight, idx_weight, n_total, use_XML)
Add a single additional weight to an LHE file.
default_weight_exists_already(powheg_LHE_output)
Returns True if the LHE file contains a weight called "nominal".