3 """! Utility functions for manipulating LHE files.
5 @author James Robinson <james.robinson@cern.ch>
10 from AthenaCommon
import Logging
11 from xml.etree
import ElementTree
18 f =
open(filename,
"rb")
19 if bytes(f.read(2)).hex() ==
'1f8b':
20 return io.TextIOWrapper(gzip.GzipFile(filename))
22 return open(filename,
'r')
26 logger = Logging.logging.getLogger(
"PowhegControl")
29 def merge(input_file_pattern, output_file):
30 """! Merge many input LHE files into a single output file."""
32 input_file_list =
sorted([x
for x
in glob.glob(input_file_pattern)
if not x == output_file])
33 if len(input_file_list) < 1:
34 raise IOError(
"No input LHE files provided. Aborting merge!")
35 logger.info(
"Preparing to create {} from {} input files".
format(output_file, len(input_file_list)))
39 with open(output_file,
"a")
as f_output:
40 logger.info(
"... reading metadata from {}".
format(input_file_list[0]))
42 s_preamble =
preamble(input_file_list[0])
43 s_postamble =
postamble(input_file_list[0])
44 f_output.write(s_preamble)
52 f_output.write(s_postamble)
55 logger.info(
"Wrote {} events to {}".
format(nEvents + 1, output_file))
59 """! Python generator to iterate through events from input LHE files."""
61 if not isinstance(input_files, list):
62 input_files =
sorted(glob.glob(input_files))
65 for file_name
in input_files:
67 logger.info(
"... reading events from {}".
format(file_name))
68 in_event, event_lines =
False,
""
75 line = line[line.index(
"<event"):]
78 if "</event>" in line:
80 in_event, event_lines =
False,
""
84 """! Python generator to iterate through event weights from input LHE files."""
86 yield float([w
for w
in event.splitlines()[1].
split(
" ")
if w][2])
90 """! Count total number of events in input files."""
91 for nEvents, _
in enumerate(
event_iterator(input_file_pattern, verbose=
False)):
97 """! Add a weight to a header passed as input (can be a string or an ElementTree)."""
99 if not isinstance(header, ElementTree.Element):
100 header_elem = ElementTree.fromstring(header)
105 if header_elem.find(
"initrwgt")
is None:
106 header_elem.append(ElementTree.fromstring(
"<initrwgt></initrwgt>"))
109 if weightgroup_name
not in [x.attrib[
"name"]
for x
in header_elem.find(
"initrwgt").findall(
"weightgroup")]:
110 header_elem.find(
"initrwgt").
append(ElementTree.fromstring(
"<weightgroup name='{}' combine='None' ></weightgroup>".
format(weightgroup_name)))
113 weightgroup = [x
for x
in header_elem.find(
"initrwgt").findall(
"weightgroup")
if x.attrib[
"name"] == weightgroup_name][0]
114 if not any([weight.attrib[
"id"] == weight_id
for weight
in weightgroup.findall(
"weight")]):
115 weightgroup.append(ElementTree.fromstring(
"<weight id='{}'>{}</weight>".
format(weight_id, weight_name)))
122 """! Get opening lines from file as a string."""
124 s_input = mmap.mmap(f_input.fileno(), 0, access=mmap.ACCESS_READ)
125 s_output = s_input[: s_input.find(b
"<event>")]
126 return "".
join( chr(x)
for x
in s_output)
130 """! Get closing lines from file as a string."""
132 s_input = mmap.mmap(f_input.fileno(), 0, access=mmap.ACCESS_READ)
133 s_output = s_input[s_input.rfind(b
"</event>") + 9:]
134 return "".
join( chr(x)
for x
in s_output)
138 """! Get <LesHouchesEvents> opening tag from file as a string."""
139 s_opening =
preamble(input_LHE_file)
140 if s_opening.find(
"<LesHouchesEvents") != -1:
141 return s_opening[s_opening.find(
"<LesHouchesEvents"): s_opening.find(
">") + 1].strip(
"\n")
146 """! Get comment block from file as a string."""
147 s_opening =
preamble(input_LHE_file)
148 if s_opening.find(
"<!--") != -1:
149 return s_opening[s_opening.find(
"<!--"): s_opening.find(
"-->") + 3].strip(
"\n")
154 """! Get <header> block from file as a string."""
155 s_opening =
preamble(input_LHE_file)
156 if s_opening.find(
"<header>") != -1:
157 return s_opening[s_opening.find(
"<header>"): s_opening.find(
"</header>") + 9].strip(
"\n")
158 return "<header>\n</header>"
162 """! Get <init> block from file as a string."""
163 s_opening =
preamble(input_LHE_file)
165 if s_opening.find(
"<init>") != -1:
166 return s_opening[s_opening.find(
"<init>"): s_opening.find(
"</init>") + 7].strip(
"\n")
167 if s_opening.find(
"<init ") != -1:
168 return s_opening[s_opening.find(
"<init "): s_opening.find(
"</init>") + 7].strip(
"\n")
169 return "<init>\n</init>"
173 """! Get first event from file as a string."""
175 s_input = mmap.mmap(f_input.fileno(), 0, access=mmap.ACCESS_READ)
177 s_output = s_input[s_input.find(
"<event"): s_input.find(
"</event>") + 8]
178 return "".
join( chr(x)
for x
in s_output)
182 """! Get weight name/value pairs from an input string."""
183 comment_lines = input_event[input_event.find(
"#"):].
replace(
"\n",
" ").
replace(
"</event>",
"")
184 weight_lines = [
" ".
join(line.split())
for line
in comment_lines.split(
"#")
if "new weight,renfact,facfact,pdf1,pdf2" in line]
185 return [(line.split(
" ")[-1], line.split(
" ")[2])
for line
in weight_lines]
189 """! Ensure that all final-state quarks in the event are coloured."""
190 initial_colour_flow, is_event_changed = -1,
False
192 for input_line
in input_event.splitlines(
True):
195 IDUP, ISTUP, MOTHUP0, MOTHUP1, ICOLUP0, ICOLUP1, PUP0, PUP1, PUP2, PUPU3, PUP4, VTIMUP, SPINUP = input_line.split()
196 if int(IDUP) == 21
and int(ISTUP) == -1:
197 initial_colour_flow =
max(initial_colour_flow,
int(ICOLUP0),
int(ICOLUP1))
198 if int(ICOLUP0) == 0
and int(ICOLUP0) == 0:
199 tokens = re.split(
r"(\s+)", input_line)
201 output_line =
"".
join(tokens[:9]) +
" {0:>5d} {1:>5d}".
format(initial_colour_flow + 1, 0) +
"".
join(tokens[13:])
202 is_event_changed =
True
204 output_line =
"".
join(tokens[:9]) +
" {0:>5d} {1:>5d}".
format(0, initial_colour_flow + 1) +
"".
join(tokens[13:])
205 is_event_changed =
True
208 event_lines += output_line
if output_line
is not None else input_line
209 return (is_event_changed, event_lines)
213 Swap out muons for taus, and muon neutrinos for tau neutrinos.
214 Note no momentum reshuffling is done, but Pythia appears to restore the correct tau mass.
216 is_event_changed =
False
218 for input_line
in input_event.splitlines(
True):
221 tokens = re.split(
r"(\s+)", input_line)
222 if len(tokens) < 25:
raise ValueError
223 IDUP =
int(tokens[2])
224 if abs(IDUP) == 13
or abs(IDUP) == 14:
229 is_event_changed =
True
230 output_line =
"".
join(
"".
join(tokens[:2])+
str(IDUP)+
"".
join(tokens[3:]))
234 event_lines += output_line
if output_line
is not None else input_line
235 return (is_event_changed, event_lines)
239 Swap out electrons for taus, and electron neutrinos for tau neutrinos.
240 Note no momentum reshuffling is done, but Pythia appears to restore the correct tau mass.
242 is_event_changed =
False
244 for input_line
in input_event.splitlines(
True):
247 tokens = re.split(
r"(\s+)", input_line)
248 if len(tokens) < 25:
raise ValueError
249 IDUP =
int(tokens[2])
250 if abs(IDUP) == 11
or abs(IDUP) == 12:
255 is_event_changed =
True
256 output_line =
"".
join(
"".
join(tokens[:2])+
str(IDUP)+
"".
join(tokens[3:]))
259 event_lines += output_line
if output_line
is not None else input_line
260 return (is_event_changed, event_lines)
264 Swap out muons for electrons, and muon neutrinos for electron neutrinos.
265 Note no momentum reshuffling is done.
267 is_event_changed =
False
269 for input_line
in input_event.splitlines(
True):
272 tokens = re.split(
r"(\s+)", input_line)
273 if len(tokens) < 25:
raise ValueError
274 IDUP =
int(tokens[2])
275 if abs(IDUP) == 13
or abs(IDUP) == 14:
280 is_event_changed =
True
281 output_line =
"".
join(
"".
join(tokens[:2])+
str(IDUP)+
"".
join(tokens[3:]))
284 event_lines += output_line
if output_line
is not None else input_line
285 return (is_event_changed, event_lines)
289 Swap out electrons for muons, and electron neutrinos for muon neutrinos.
290 Note no momentum reshuffling is done.
292 is_event_changed =
False
294 for input_line
in input_event.splitlines(
True):
297 tokens = re.split(
r"(\s+)", input_line)
298 if len(tokens) < 25:
raise ValueError
299 IDUP =
int(tokens[2])
300 if abs(IDUP) == 11
or abs(IDUP) == 12:
305 is_event_changed =
True
306 output_line =
"".
join(
"".
join(tokens[:2])+
str(IDUP)+
"".
join(tokens[3:]))
309 event_lines += output_line
if output_line
is not None else input_line
310 return (is_event_changed, event_lines)
315 Algorithm specific to gg4l Powheg process, to obtain an inclusive
316 sample starting from the only supported decay mode for ZZ production, 2e2mu.
318 is_event_changed =
False
322 channels_pdgIds[
"2e2mu"] = [11,13]
323 channels_pdgIds[
"2e2tau"] = [11,15]
324 channels_pdgIds[
"2mu2tau"] = [13,15]
325 channels_pdgIds[
"4e"] = [11,11]
326 channels_pdgIds[
"4mu"] = [13,13]
327 channels_pdgIds[
"4tau"] = [15,15]
329 channels = [
"2e2mu",
"2e2tau",
"2mu2tau",
"4e",
"4mu",
"4tau"]
331 probs = np.array([2/9, 2/9, 2/9, 1/9, 1/9, 1/9])
332 cumulative=np.cumsum(probs)
334 for input_line
in input_event.splitlines(
True):
337 tokens = re.split(
r"(\s+)", input_line)
338 if len(tokens) < 25:
raise ValueError
339 IDUP =
int(tokens[2])
342 if not is_event_changed:
343 idx = np.searchsorted(cumulative, np.random.uniform())
344 is_event_changed =
True
346 IDUP = channels_pdgIds[channels[idx]][0]
348 IDUP = -channels_pdgIds[channels[idx]][0]
349 output_line =
"".
join(
"".
join(tokens[:2])+
str(IDUP)+
"".
join(tokens[3:]))
352 if not is_event_changed:
353 idx = np.searchsorted(cumulative, np.random.uniform())
354 is_event_changed =
True
356 IDUP = channels_pdgIds[channels[idx]][1]
358 IDUP = -channels_pdgIds[channels[idx]][1]
359 output_line =
"".
join(
"".
join(tokens[:2])+
str(IDUP)+
"".
join(tokens[3:]))
362 event_lines += output_line
if output_line
is not None else input_line
363 return (is_event_changed, event_lines)
367 """! Ensure that XWGTUP is equal to the reweighted nominal."""
371 for input_line
in input_event.splitlines(
True):
372 if input_line.find(
"<wgt id='0'>") < 0:
376 rwgt_nominal = ElementTree.fromstring(input_line.strip())
379 raise IOError(
"Impossible to understand line with nominal weight from reweighting")
380 for input_line
in input_event.splitlines(
True):
384 NUP, IDPRUP, XWGTUP, SCALUP, AQEDUP, AQCDUP = input_line.split()
385 output_line =
" %s %s %s %s %s %s\n"%(NUP, IDPRUP, rwgt_nominal.text, SCALUP, AQEDUP, AQCDUP)
388 if input_line.find(
"</rwgt>") >= 0:
389 if (wgtid_for_old_XWGTUP_value
is not None):
390 output_line =
"<wgt id='%i'>%s</wgt>\n"%(wgtid_for_old_XWGTUP_value, XWGTUP)
391 output_line += input_line
392 event_lines += output_line
if output_line
is not None else input_line
397 """! Get new-style event weights from an input event string."""
399 if "#new weight,renfact,facfact,pdf1,pdf2" not in input_event:
402 if "<rwgt>" not in input_event:
403 nominal_weight = [x
for x
in input_event.split(
"\n")[1].
split(
" ")
if len(x) > 0][2]
404 input_event = input_event[:input_event.find(
"#")] +
"<rwgt>\n<wgt id='0'> {0} </wgt>\n</rwgt>\n".
format(nominal_weight) + input_event[input_event.find(
"#"):]
405 weight_lines =
"".
join([
"<wgt id='{}'> {} </wgt>\n".
format(name_to_ID[weight[0]], weight[1])
for weight
in string_to_weight(input_event)])
406 return input_event[:input_event.find(
"</rwgt>")] + weight_lines + input_event[input_event.find(
"</rwgt>"):input_event.find(
"#new weight,renfact,facfact,pdf1,pdf2")] +
"</event>\n"
410 """! Re-indent XML so that elements are on their own line."""
412 if not elem.text
or not elem.text.strip():
414 if not elem.tail
or not elem.tail.strip():
418 if not elem.tail
or not elem.tail.strip():
421 if not elem.tail
or not elem.tail.strip():