ATLAS Offline Software
Loading...
Searching...
No Matches
LHE.py
Go to the documentation of this file.
1# Copyright (C) 2002-2022 CERN for the benefit of the ATLAS collaboration
2
3"""! Utility functions for manipulating LHE files.
4
5@author James Robinson <james.robinson@cern.ch>
6"""
7import re
8import glob
9import mmap
10from AthenaCommon import Logging
11from xml.etree import ElementTree
12import gzip
13import io
14import numpy as np
15
16
17def _open_file(filename):
18 f = open(filename,"rb")
19 if bytes(f.read(2)).hex() == '1f8b':
20 return io.TextIOWrapper(gzip.GzipFile(filename))
21 else:
22 return open(filename,'r')
23
24
25
26logger = Logging.logging.getLogger("PowhegControl")
27
28
29def merge(input_file_pattern, output_file):
30 """! Merge many input LHE files into a single output file."""
31 # Create input file list from pattern, ensuring that output_file is not in the list
32 input_file_list = sorted([x for x in glob.glob(input_file_pattern) if not x == output_file])
33 if len(input_file_list) < 1:
34 raise IOError("No input LHE files provided. Aborting merge!")
35 logger.info("Preparing to create {} from {} input files".format(output_file, len(input_file_list)))
36
37 # Open output file and write events
38 nEvents = 0
39 with open(output_file, "a") as f_output:
40 logger.info("... reading metadata from {}".format(input_file_list[0]))
41 # Start with the first file and extract opening/closing string
42 s_preamble = preamble(input_file_list[0])
43 s_postamble = postamble(input_file_list[0])
44 f_output.write(s_preamble)
45
46 # Now write events from all files to output
47 # Use sanitised list in case output_file matches the pattern
48 for nEvents, event in enumerate(event_iterator(input_file_list)):
49 f_output.write(event)
50
51 # Finally add the footer
52 f_output.write(s_postamble)
53
54 # Write the event count to the logger
55 logger.info("Wrote {} events to {}".format(nEvents + 1, output_file))
56
57
58def event_iterator(input_files, verbose=True):
59 """! Python generator to iterate through events from input LHE files."""
60 # If a glob-able pattern is provided, expand this into a list
61 if not isinstance(input_files, list):
62 input_files = sorted(glob.glob(input_files))
63
64 # Iterate over input files
65 for file_name in input_files:
66 if verbose:
67 logger.info("... reading events from {}".format(file_name))
68 in_event, event_lines = False, ""
69 # Group all lines inside an XML event element
70 with _open_file(file_name) as f_input:
71 for line in f_input:
72 # Both <event ...> and <event> are permitted
73 if "<event" in line:
74 in_event = True
75 line = line[line.index("<event"):] # catch cases like "</init><event>"
76 if in_event:
77 event_lines += line
78 if "</event>" in line:
79 yield event_lines
80 in_event, event_lines = False, ""
81
82
83def event_weight_iterator(input_files):
84 """! Python generator to iterate through event weights from input LHE files."""
85 for event in event_iterator(input_files):
86 yield float([w for w in event.splitlines()[1].split(" ") if w][2])
87
88
89def event_counter(input_file_pattern):
90 """! Count total number of events in input files."""
91 for nEvents, _ in enumerate(event_iterator(input_file_pattern, verbose=False)):
92 pass
93 return nEvents + 1
94
95
96def add_weight_to_header(header, weightgroup_name, weight_name, weight_id):
97 """! Add a weight to a header passed as input (can be a string or an ElementTree)."""
98 # Convert string to ElementTree
99 if not isinstance(header, ElementTree.Element):
100 header_elem = ElementTree.fromstring(header)
101 else:
102 header_elem = header
103
104 # Add initrwgt element if it doesn't exist
105 if header_elem.find("initrwgt") is None:
106 header_elem.append(ElementTree.fromstring("<initrwgt></initrwgt>"))
107
108 # Add weightgroup element if it doesn't exist
109 if weightgroup_name not in [x.attrib["name"] for x in header_elem.find("initrwgt").findall("weightgroup")]:
110 header_elem.find("initrwgt").append(ElementTree.fromstring("<weightgroup name='{}' combine='None' ></weightgroup>".format(weightgroup_name)))
111
112 # Add weight to appropriate weightgroup
113 weightgroup = [x for x in header_elem.find("initrwgt").findall("weightgroup") if x.attrib["name"] == weightgroup_name][0]
114 if not any([weight.attrib["id"] == weight_id for weight in weightgroup.findall("weight")]):
115 weightgroup.append(ElementTree.fromstring("<weight id='{}'>{}</weight>".format(weight_id, weight_name)))
116
117 reindent_XML(header_elem)
118 return header_elem
119
120
121def preamble(input_LHE_file):
122 """! Get opening lines from file as a string."""
123 with _open_file(input_LHE_file) as f_input:
124 s_input = mmap.mmap(f_input.fileno(), 0, access=mmap.ACCESS_READ)
125 s_output = s_input[: s_input.find(b"<event>")]
126 return "".join( chr(x) for x in s_output)
127
128
129def postamble(input_LHE_file):
130 """! Get closing lines from file as a string."""
131 with _open_file(input_LHE_file) as f_input:
132 s_input = mmap.mmap(f_input.fileno(), 0, access=mmap.ACCESS_READ)
133 s_output = s_input[s_input.rfind(b"</event>") + 9:]
134 return "".join( chr(x) for x in s_output)
135
136
137def opening_tag(input_LHE_file):
138 """! Get <LesHouchesEvents> opening tag from file as a string."""
139 s_opening = preamble(input_LHE_file)
140 if s_opening.find("<LesHouchesEvents") != -1:
141 return s_opening[s_opening.find("<LesHouchesEvents"): s_opening.find(">") + 1].strip("\n")
142 return ""
143
144
145def comment_block(input_LHE_file):
146 """! Get comment block from file as a string."""
147 s_opening = preamble(input_LHE_file)
148 if s_opening.find("<!--") != -1:
149 return s_opening[s_opening.find("<!--"): s_opening.find("-->") + 3].strip("\n")
150 return ""
151
152
153def header_block(input_LHE_file):
154 """! Get <header> block from file as a string."""
155 s_opening = preamble(input_LHE_file)
156 if s_opening.find("<header>") != -1:
157 return s_opening[s_opening.find("<header>"): s_opening.find("</header>") + 9].strip("\n")
158 return "<header>\n</header>"
159
160
161def init_block(input_LHE_file):
162 """! Get <init> block from file as a string."""
163 s_opening = preamble(input_LHE_file)
164 # Both <init ...> and <init> are permitted
165 if s_opening.find("<init>") != -1:
166 return s_opening[s_opening.find("<init>"): s_opening.find("</init>") + 7].strip("\n")
167 if s_opening.find("<init ") != -1:
168 return s_opening[s_opening.find("<init "): s_opening.find("</init>") + 7].strip("\n")
169 return "<init>\n</init>"
170
171
172def get_first_event(input_LHE_file):
173 """! Get first event from file as a string."""
174 with _open_file(input_LHE_file) as f_input:
175 s_input = mmap.mmap(f_input.fileno(), 0, access=mmap.ACCESS_READ)
176 # Both <event ...> and <event> are permitted
177 s_output = s_input[s_input.find("<event"): s_input.find("</event>") + 8]
178 return "".join( chr(x) for x in s_output)
179
180
181def string_to_weight(input_event):
182 """! Get weight name/value pairs from an input string."""
183 comment_lines = input_event[input_event.find("#"):].replace("\n", " ").replace("</event>", "")
184 weight_lines = [" ".join(line.split()) for line in comment_lines.split("#") if "new weight,renfact,facfact,pdf1,pdf2" in line]
185 return [(line.split(" ")[-1], line.split(" ")[2]) for line in weight_lines]
186
187
188def ensure_coloured_quarks(input_event):
189 """! Ensure that all final-state quarks in the event are coloured."""
190 initial_colour_flow, is_event_changed = -1, False
191 event_lines = ""
192 for input_line in input_event.splitlines(True):
193 output_line = None
194 try: # interpret line as a particle
195 IDUP, ISTUP, MOTHUP0, MOTHUP1, ICOLUP0, ICOLUP1, PUP0, PUP1, PUP2, PUPU3, PUP4, VTIMUP, SPINUP = input_line.split()
196 if int(IDUP) == 21 and int(ISTUP) == -1: # this is an initial state gluon
197 initial_colour_flow = max(initial_colour_flow, int(ICOLUP0), int(ICOLUP1))
198 if int(ICOLUP0) == 0 and int(ICOLUP0) == 0: # this is a colourless particle
199 tokens = re.split(r"(\s+)", input_line)
200 if int(IDUP) in range(1, 7): # this is a quark
201 output_line = "".join(tokens[:9]) + " {0:>5d} {1:>5d}".format(initial_colour_flow + 1, 0) + "".join(tokens[13:])
202 is_event_changed = True
203 if int(IDUP) in range(-6, 0): # this is an anti-quark
204 output_line = "".join(tokens[:9]) + " {0:>5d} {1:>5d}".format(0, initial_colour_flow + 1) + "".join(tokens[13:])
205 is_event_changed = True
206 except ValueError: # this is not a particle line
207 pass
208 event_lines += output_line if output_line is not None else input_line
209 return (is_event_changed, event_lines)
210
211def mu2tau(input_event):
212 """!
213 Swap out muons for taus, and muon neutrinos for tau neutrinos.
214 Note no momentum reshuffling is done, but Pythia appears to restore the correct tau mass.
215 """
216 is_event_changed = False
217 event_lines = ""
218 for input_line in input_event.splitlines(True):
219 output_line = None
220 try: # interpret line as a particle
221 tokens = re.split(r"(\s+)", input_line)
222 if len(tokens) < 25: raise ValueError
223 IDUP = int(tokens[2])
224 if abs(IDUP) == 13 or abs(IDUP) == 14: # this is a muon or muon neutrino
225 if IDUP > 0:
226 IDUP += 2
227 else:
228 IDUP -= 2
229 is_event_changed = True
230 output_line = "".join("".join(tokens[:2])+str(IDUP)+"".join(tokens[3:]))
231 # print "LHE mu2tau swap \n\'"+input_line+"\', for \n\'"+output_line+"\'"
232 except ValueError: # this is not a particle line
233 pass
234 event_lines += output_line if output_line is not None else input_line
235 return (is_event_changed, event_lines)
236
237def e2tau(input_event):
238 """!
239 Swap out electrons for taus, and electron neutrinos for tau neutrinos.
240 Note no momentum reshuffling is done, but Pythia appears to restore the correct tau mass.
241 """
242 is_event_changed = False
243 event_lines = ""
244 for input_line in input_event.splitlines(True):
245 output_line = None
246 try: # interpret line as a particle
247 tokens = re.split(r"(\s+)", input_line)
248 if len(tokens) < 25: raise ValueError
249 IDUP = int(tokens[2])
250 if abs(IDUP) == 11 or abs(IDUP) == 12: # this is a electron or electron neutrino
251 if IDUP > 0:
252 IDUP += 4
253 else:
254 IDUP -= 4
255 is_event_changed = True
256 output_line = "".join("".join(tokens[:2])+str(IDUP)+"".join(tokens[3:]))
257 except ValueError: # this is not a particle line
258 pass
259 event_lines += output_line if output_line is not None else input_line
260 return (is_event_changed, event_lines)
261
262def mu2e(input_event):
263 """!
264 Swap out muons for electrons, and muon neutrinos for electron neutrinos.
265 Note no momentum reshuffling is done.
266 """
267 is_event_changed = False
268 event_lines = ""
269 for input_line in input_event.splitlines(True):
270 output_line = None
271 try: # interpret line as a particle
272 tokens = re.split(r"(\s+)", input_line)
273 if len(tokens) < 25: raise ValueError
274 IDUP = int(tokens[2])
275 if abs(IDUP) == 13 or abs(IDUP) == 14: # this is a muon or muon neutrino
276 if IDUP > 0:
277 IDUP -= 2
278 else:
279 IDUP += 2
280 is_event_changed = True
281 output_line = "".join("".join(tokens[:2])+str(IDUP)+"".join(tokens[3:]))
282 except ValueError: # this is not a particle line
283 pass
284 event_lines += output_line if output_line is not None else input_line
285 return (is_event_changed, event_lines)
286
287def e2mu(input_event):
288 """!
289 Swap out electrons for muons, and electron neutrinos for muon neutrinos.
290 Note no momentum reshuffling is done.
291 """
292 is_event_changed = False
293 event_lines = ""
294 for input_line in input_event.splitlines(True):
295 output_line = None
296 try: # interpret line as a particle
297 tokens = re.split(r"(\s+)", input_line)
298 if len(tokens) < 25: raise ValueError
299 IDUP = int(tokens[2])
300 if abs(IDUP) == 11 or abs(IDUP) == 12: # this is an electron or electron neutrino
301 if IDUP > 0:
302 IDUP += 2
303 else:
304 IDUP -= 2
305 is_event_changed = True
306 output_line = "".join("".join(tokens[:2])+str(IDUP)+"".join(tokens[3:]))
307 except ValueError: # this is not a particle line
308 pass
309 event_lines += output_line if output_line is not None else input_line
310 return (is_event_changed, event_lines)
311
312
313def gg4l_emu2all(input_event):
314 """!
315 Algorithm specific to gg4l Powheg process, to obtain an inclusive
316 sample starting from the only supported decay mode for ZZ production, 2e2mu.
317 """
318 is_event_changed = False
319 event_lines = ""
320
321 channels_pdgIds = {}
322 channels_pdgIds["2e2mu"] = [11,13]
323 channels_pdgIds["2e2tau"] = [11,15]
324 channels_pdgIds["2mu2tau"] = [13,15]
325 channels_pdgIds["4e"] = [11,11]
326 channels_pdgIds["4mu"] = [13,13]
327 channels_pdgIds["4tau"] = [15,15]
328
329 channels = ["2e2mu", "2e2tau", "2mu2tau", "4e", "4mu", "4tau"]
330 #probs = [xsec 2l2l', .., .., xsec 4l, .., ..]
331 probs = np.array([2/9, 2/9, 2/9, 1/9, 1/9, 1/9])
332 cumulative=np.cumsum(probs)
333
334 for input_line in input_event.splitlines(True):
335 output_line = None
336 try: # interpret line as a particle
337 tokens = re.split(r"(\s+)", input_line)
338 if len(tokens) < 25: raise ValueError
339 IDUP = int(tokens[2])
340
341 if abs(IDUP) == 11: #this is the electron part
342 if not is_event_changed:
343 idx = np.searchsorted(cumulative, np.random.uniform())
344 is_event_changed = True
345 if IDUP > 0:
346 IDUP = channels_pdgIds[channels[idx]][0]
347 else:
348 IDUP = -channels_pdgIds[channels[idx]][0]
349 output_line = "".join("".join(tokens[:2])+str(IDUP)+"".join(tokens[3:]))
350
351 if abs(IDUP) == 13: #this is the muon part
352 if not is_event_changed:
353 idx = np.searchsorted(cumulative, np.random.uniform())
354 is_event_changed = True
355 if IDUP > 0:
356 IDUP = channels_pdgIds[channels[idx]][1]
357 else:
358 IDUP = -channels_pdgIds[channels[idx]][1]
359 output_line = "".join("".join(tokens[:2])+str(IDUP)+"".join(tokens[3:]))
360 except ValueError: # this is not a particle line
361 pass
362 event_lines += output_line if output_line is not None else input_line
363 return (is_event_changed, event_lines)
364
365
366def update_XWGTUP_with_reweighted_nominal(input_event, wgtid_for_old_XWGTUP_value = None):
367 """! Ensure that XWGTUP is equal to the reweighted nominal."""
368 event_lines = ""
369 rwgt_nominal = None
370 XWGTUP = None
371 for input_line in input_event.splitlines(True): # loop first to fine reweighted nominal
372 if input_line.find("<wgt id=\'0\'>") < 0:
373 continue
374 else:
375 try:
376 rwgt_nominal = ElementTree.fromstring(input_line.strip())
377 break # no need to continue the loop
378 except Exception:
379 raise IOError("Impossible to understand line with nominal weight from reweighting")
380 for input_line in input_event.splitlines(True):
381 output_line = None
382 if XWGTUP is None: # XWGTUP not yet found
383 try: # interpret line as a general event info line
384 NUP, IDPRUP, XWGTUP, SCALUP, AQEDUP, AQCDUP = input_line.split()
385 output_line = " %s %s %s %s %s %s\n"%(NUP, IDPRUP, rwgt_nominal.text, SCALUP, AQEDUP, AQCDUP)
386 except ValueError: # this is not a general event info line
387 pass
388 if input_line.find("</rwgt>") >= 0:
389 if (wgtid_for_old_XWGTUP_value is not None):# in this case, add the original value of XWGTUP as last weight
390 output_line = "<wgt id='%i'>%s</wgt>\n"%(wgtid_for_old_XWGTUP_value, XWGTUP)
391 output_line += input_line
392 event_lines += output_line if output_line is not None else input_line
393 return (event_lines)
394
395
396def Powheg2LHEv3(input_event, name_to_ID):
397 """! Get new-style event weights from an input event string."""
398 # Return event as-is if there are no Powheg-style weights
399 if "#new weight,renfact,facfact,pdf1,pdf2" not in input_event:
400 return input_event
401 # Otherwise convert these to LHEv3 weights
402 if "<rwgt>" not in input_event:
403 nominal_weight = [x for x in input_event.split("\n")[1].split(" ") if len(x) > 0][2]
404 input_event = input_event[:input_event.find("#")] + "<rwgt>\n<wgt id='0'> {0} </wgt>\n</rwgt>\n".format(nominal_weight) + input_event[input_event.find("#"):]
405 weight_lines = "".join(["<wgt id='{}'> {} </wgt>\n".format(name_to_ID[weight[0]], weight[1]) for weight in string_to_weight(input_event)])
406 return input_event[:input_event.find("</rwgt>")] + weight_lines + input_event[input_event.find("</rwgt>"):input_event.find("#new weight,renfact,facfact,pdf1,pdf2")] + "</event>\n"
407
408
409def reindent_XML(elem):
410 """! Re-indent XML so that elements are on their own line."""
411 if len(elem):
412 if not elem.text or not elem.text.strip():
413 elem.text = "\n"
414 if not elem.tail or not elem.tail.strip():
415 elem.tail = "\n"
416 for subelem in elem:
417 reindent_XML(subelem)
418 if not elem.tail or not elem.tail.strip():
419 elem.tail = "\n"
420 else:
421 if not elem.tail or not elem.tail.strip():
422 elem.tail = "\n"
#define max(a, b)
Definition cfImp.cxx:41
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition hcg.cxx:310
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
Definition merge.py:1
e2tau(input_event)
Swap out electrons for taus, and electron neutrinos for tau neutrinos.
Definition LHE.py:237
ensure_coloured_quarks(input_event)
Ensure that all final-state quarks in the event are coloured.
Definition LHE.py:188
event_counter(input_file_pattern)
Count total number of events in input files.
Definition LHE.py:89
comment_block(input_LHE_file)
Get comment block from file as a string.
Definition LHE.py:145
event_weight_iterator(input_files)
Python generator to iterate through event weights from input LHE files.
Definition LHE.py:83
get_first_event(input_LHE_file)
Get first event from file as a string.
Definition LHE.py:172
reindent_XML(elem)
Re-indent XML so that elements are on their own line.
Definition LHE.py:409
mu2e(input_event)
Swap out muons for electrons, and muon neutrinos for electron neutrinos.
Definition LHE.py:262
update_XWGTUP_with_reweighted_nominal(input_event, wgtid_for_old_XWGTUP_value=None)
Ensure that XWGTUP is equal to the reweighted nominal.
Definition LHE.py:366
gg4l_emu2all(input_event)
Algorithm specific to gg4l Powheg process, to obtain an inclusive sample starting from the only suppo...
Definition LHE.py:313
init_block(input_LHE_file)
Get <init> block from file as a string.
Definition LHE.py:161
Powheg2LHEv3(input_event, name_to_ID)
Get new-style event weights from an input event string.
Definition LHE.py:396
event_iterator(input_files, verbose=True)
Python generator to iterate through events from input LHE files.
Definition LHE.py:58
mu2tau(input_event)
Swap out muons for taus, and muon neutrinos for tau neutrinos.
Definition LHE.py:211
opening_tag(input_LHE_file)
Get <LesHouchesEvents> opening tag from file as a string.
Definition LHE.py:137
add_weight_to_header(header, weightgroup_name, weight_name, weight_id)
Add a weight to a header passed as input (can be a string or an ElementTree).
Definition LHE.py:96
e2mu(input_event)
Swap out electrons for muons, and electron neutrinos for muon neutrinos.
Definition LHE.py:287
_open_file(filename)
Definition LHE.py:17
header_block(input_LHE_file)
Get <header> block from file as a string.
Definition LHE.py:153
string_to_weight(input_event)
Get weight name/value pairs from an input string.
Definition LHE.py:181
preamble(input_LHE_file)
Get opening lines from file as a string.
Definition LHE.py:121
postamble(input_LHE_file)
Get closing lines from file as a string.
Definition LHE.py:129