ATLAS Offline Software
LHE.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2022 CERN for the benefit of the ATLAS collaboration
2 
3 """! Utility functions for manipulating LHE files.
4 
5 @author James Robinson <james.robinson@cern.ch>
6 """
7 import re
8 import glob
9 import mmap
10 from AthenaCommon import Logging
11 from xml.etree import ElementTree
12 import gzip
13 import io
14 import numpy as np
15 
16 
17 def _open_file(filename):
18  f = open(filename,"rb")
19  if bytes(f.read(2)).hex() == '1f8b':
20  return io.TextIOWrapper(gzip.GzipFile(filename))
21  else:
22  return open(filename,'r')
23 
24 
25 
26 logger = Logging.logging.getLogger("PowhegControl")
27 
28 
29 def merge(input_file_pattern, output_file):
30  """! Merge many input LHE files into a single output file."""
31  # Create input file list from pattern, ensuring that output_file is not in the list
32  input_file_list = sorted([x for x in glob.glob(input_file_pattern) if not x == output_file])
33  if len(input_file_list) < 1:
34  raise IOError("No input LHE files provided. Aborting merge!")
35  logger.info("Preparing to create {} from {} input files".format(output_file, len(input_file_list)))
36 
37  # Open output file and write events
38  nEvents = 0
39  with open(output_file, "a") as f_output:
40  logger.info("... reading metadata from {}".format(input_file_list[0]))
41  # Start with the first file and extract opening/closing string
42  s_preamble = preamble(input_file_list[0])
43  s_postamble = postamble(input_file_list[0])
44  f_output.write(s_preamble)
45 
46  # Now write events from all files to output
47  # Use sanitised list in case output_file matches the pattern
48  for nEvents, event in enumerate(event_iterator(input_file_list)):
49  f_output.write(event)
50 
51  # Finally add the footer
52  f_output.write(s_postamble)
53 
54  # Write the event count to the logger
55  logger.info("Wrote {} events to {}".format(nEvents + 1, output_file))
56 
57 
58 def event_iterator(input_files, verbose=True):
59  """! Python generator to iterate through events from input LHE files."""
60  # If a glob-able pattern is provided, expand this into a list
61  if not isinstance(input_files, list):
62  input_files = sorted(glob.glob(input_files))
63 
64  # Iterate over input files
65  for file_name in input_files:
66  if verbose:
67  logger.info("... reading events from {}".format(file_name))
68  in_event, event_lines = False, ""
69  # Group all lines inside an XML event element
70  with _open_file(file_name) as f_input:
71  for line in f_input:
72  # Both <event ...> and <event> are permitted
73  if "<event" in line:
74  in_event = True
75  line = line[line.index("<event"):] # catch cases like "</init><event>"
76  if in_event:
77  event_lines += line
78  if "</event>" in line:
79  yield event_lines
80  in_event, event_lines = False, ""
81 
82 
83 def event_weight_iterator(input_files):
84  """! Python generator to iterate through event weights from input LHE files."""
85  for event in event_iterator(input_files):
86  yield float([w for w in event.splitlines()[1].split(" ") if w][2])
87 
88 
89 def event_counter(input_file_pattern):
90  """! Count total number of events in input files."""
91  for nEvents, _ in enumerate(event_iterator(input_file_pattern, verbose=False)):
92  pass
93  return nEvents + 1
94 
95 
96 def add_weight_to_header(header, weightgroup_name, weight_name, weight_id):
97  """! Add a weight to a header passed as input (can be a string or an ElementTree)."""
98  # Convert string to ElementTree
99  if not isinstance(header, ElementTree.Element):
100  header_elem = ElementTree.fromstring(header)
101  else:
102  header_elem = header
103 
104  # Add initrwgt element if it doesn't exist
105  if header_elem.find("initrwgt") is None:
106  header_elem.append(ElementTree.fromstring("<initrwgt></initrwgt>"))
107 
108  # Add weightgroup element if it doesn't exist
109  if weightgroup_name not in [x.attrib["name"] for x in header_elem.find("initrwgt").findall("weightgroup")]:
110  header_elem.find("initrwgt").append(ElementTree.fromstring("<weightgroup name='{}' combine='None' ></weightgroup>".format(weightgroup_name)))
111 
112  # Add weight to appropriate weightgroup
113  weightgroup = [x for x in header_elem.find("initrwgt").findall("weightgroup") if x.attrib["name"] == weightgroup_name][0]
114  if not any([weight.attrib["id"] == weight_id for weight in weightgroup.findall("weight")]):
115  weightgroup.append(ElementTree.fromstring("<weight id='{}'>{}</weight>".format(weight_id, weight_name)))
116 
117  reindent_XML(header_elem)
118  return header_elem
119 
120 
121 def preamble(input_LHE_file):
122  """! Get opening lines from file as a string."""
123  with _open_file(input_LHE_file) as f_input:
124  s_input = mmap.mmap(f_input.fileno(), 0, access=mmap.ACCESS_READ)
125  s_output = s_input[: s_input.find(b"<event>")]
126  return "".join( chr(x) for x in s_output)
127 
128 
129 def postamble(input_LHE_file):
130  """! Get closing lines from file as a string."""
131  with _open_file(input_LHE_file) as f_input:
132  s_input = mmap.mmap(f_input.fileno(), 0, access=mmap.ACCESS_READ)
133  s_output = s_input[s_input.rfind(b"</event>") + 9:]
134  return "".join( chr(x) for x in s_output)
135 
136 
137 def opening_tag(input_LHE_file):
138  """! Get <LesHouchesEvents> opening tag from file as a string."""
139  s_opening = preamble(input_LHE_file)
140  if s_opening.find("<LesHouchesEvents") != -1:
141  return s_opening[s_opening.find("<LesHouchesEvents"): s_opening.find(">") + 1].strip("\n")
142  return ""
143 
144 
145 def comment_block(input_LHE_file):
146  """! Get comment block from file as a string."""
147  s_opening = preamble(input_LHE_file)
148  if s_opening.find("<!--") != -1:
149  return s_opening[s_opening.find("<!--"): s_opening.find("-->") + 3].strip("\n")
150  return ""
151 
152 
153 def header_block(input_LHE_file):
154  """! Get <header> block from file as a string."""
155  s_opening = preamble(input_LHE_file)
156  if s_opening.find("<header>") != -1:
157  return s_opening[s_opening.find("<header>"): s_opening.find("</header>") + 9].strip("\n")
158  return "<header>\n</header>"
159 
160 
161 def init_block(input_LHE_file):
162  """! Get <init> block from file as a string."""
163  s_opening = preamble(input_LHE_file)
164  # Both <init ...> and <init> are permitted
165  if s_opening.find("<init>") != -1:
166  return s_opening[s_opening.find("<init>"): s_opening.find("</init>") + 7].strip("\n")
167  if s_opening.find("<init ") != -1:
168  return s_opening[s_opening.find("<init "): s_opening.find("</init>") + 7].strip("\n")
169  return "<init>\n</init>"
170 
171 
172 def get_first_event(input_LHE_file):
173  """! Get first event from file as a string."""
174  with _open_file(input_LHE_file) as f_input:
175  s_input = mmap.mmap(f_input.fileno(), 0, access=mmap.ACCESS_READ)
176  # Both <event ...> and <event> are permitted
177  s_output = s_input[s_input.find("<event"): s_input.find("</event>") + 8]
178  return "".join( chr(x) for x in s_output)
179 
180 
181 def string_to_weight(input_event):
182  """! Get weight name/value pairs from an input string."""
183  comment_lines = input_event[input_event.find("#"):].replace("\n", " ").replace("</event>", "")
184  weight_lines = [" ".join(line.split()) for line in comment_lines.split("#") if "new weight,renfact,facfact,pdf1,pdf2" in line]
185  return [(line.split(" ")[-1], line.split(" ")[2]) for line in weight_lines]
186 
187 
188 def ensure_coloured_quarks(input_event):
189  """! Ensure that all final-state quarks in the event are coloured."""
190  initial_colour_flow, is_event_changed = -1, False
191  event_lines = ""
192  for input_line in input_event.splitlines(True):
193  output_line = None
194  try: # interpret line as a particle
195  IDUP, ISTUP, MOTHUP0, MOTHUP1, ICOLUP0, ICOLUP1, PUP0, PUP1, PUP2, PUPU3, PUP4, VTIMUP, SPINUP = input_line.split()
196  if int(IDUP) == 21 and int(ISTUP) == -1: # this is an initial state gluon
197  initial_colour_flow = max(initial_colour_flow, int(ICOLUP0), int(ICOLUP1))
198  if int(ICOLUP0) == 0 and int(ICOLUP0) == 0: # this is a colourless particle
199  tokens = re.split(r"(\s+)", input_line)
200  if int(IDUP) in range(1, 7): # this is a quark
201  output_line = "".join(tokens[:9]) + " {0:>5d} {1:>5d}".format(initial_colour_flow + 1, 0) + "".join(tokens[13:])
202  is_event_changed = True
203  if int(IDUP) in range(-6, 0): # this is an anti-quark
204  output_line = "".join(tokens[:9]) + " {0:>5d} {1:>5d}".format(0, initial_colour_flow + 1) + "".join(tokens[13:])
205  is_event_changed = True
206  except ValueError: # this is not a particle line
207  pass
208  event_lines += output_line if output_line is not None else input_line
209  return (is_event_changed, event_lines)
210 
211 def mu2tau(input_event):
212  """!
213  Swap out muons for taus, and muon neutrinos for tau neutrinos.
214  Note no momentum reshuffling is done, but Pythia appears to restore the correct tau mass.
215  """
216  is_event_changed = False
217  event_lines = ""
218  for input_line in input_event.splitlines(True):
219  output_line = None
220  try: # interpret line as a particle
221  tokens = re.split(r"(\s+)", input_line)
222  if len(tokens) < 25: raise ValueError
223  IDUP = int(tokens[2])
224  if abs(IDUP) == 13 or abs(IDUP) == 14: # this is a muon or muon neutrino
225  if IDUP > 0:
226  IDUP += 2
227  else:
228  IDUP -= 2
229  is_event_changed = True
230  output_line = "".join("".join(tokens[:2])+str(IDUP)+"".join(tokens[3:]))
231  # print "LHE mu2tau swap \n\'"+input_line+"\', for \n\'"+output_line+"\'"
232  except ValueError: # this is not a particle line
233  pass
234  event_lines += output_line if output_line is not None else input_line
235  return (is_event_changed, event_lines)
236 
237 def e2tau(input_event):
238  """!
239  Swap out electrons for taus, and electron neutrinos for tau neutrinos.
240  Note no momentum reshuffling is done, but Pythia appears to restore the correct tau mass.
241  """
242  is_event_changed = False
243  event_lines = ""
244  for input_line in input_event.splitlines(True):
245  output_line = None
246  try: # interpret line as a particle
247  tokens = re.split(r"(\s+)", input_line)
248  if len(tokens) < 25: raise ValueError
249  IDUP = int(tokens[2])
250  if abs(IDUP) == 11 or abs(IDUP) == 12: # this is a electron or electron neutrino
251  if IDUP > 0:
252  IDUP += 4
253  else:
254  IDUP -= 4
255  is_event_changed = True
256  output_line = "".join("".join(tokens[:2])+str(IDUP)+"".join(tokens[3:]))
257  except ValueError: # this is not a particle line
258  pass
259  event_lines += output_line if output_line is not None else input_line
260  return (is_event_changed, event_lines)
261 
262 def mu2e(input_event):
263  """!
264  Swap out muons for electrons, and muon neutrinos for electron neutrinos.
265  Note no momentum reshuffling is done.
266  """
267  is_event_changed = False
268  event_lines = ""
269  for input_line in input_event.splitlines(True):
270  output_line = None
271  try: # interpret line as a particle
272  tokens = re.split(r"(\s+)", input_line)
273  if len(tokens) < 25: raise ValueError
274  IDUP = int(tokens[2])
275  if abs(IDUP) == 13 or abs(IDUP) == 14: # this is a muon or muon neutrino
276  if IDUP > 0:
277  IDUP -= 2
278  else:
279  IDUP += 2
280  is_event_changed = True
281  output_line = "".join("".join(tokens[:2])+str(IDUP)+"".join(tokens[3:]))
282  except ValueError: # this is not a particle line
283  pass
284  event_lines += output_line if output_line is not None else input_line
285  return (is_event_changed, event_lines)
286 
287 def e2mu(input_event):
288  """!
289  Swap out electrons for muons, and electron neutrinos for muon neutrinos.
290  Note no momentum reshuffling is done.
291  """
292  is_event_changed = False
293  event_lines = ""
294  for input_line in input_event.splitlines(True):
295  output_line = None
296  try: # interpret line as a particle
297  tokens = re.split(r"(\s+)", input_line)
298  if len(tokens) < 25: raise ValueError
299  IDUP = int(tokens[2])
300  if abs(IDUP) == 11 or abs(IDUP) == 12: # this is an electron or electron neutrino
301  if IDUP > 0:
302  IDUP += 2
303  else:
304  IDUP -= 2
305  is_event_changed = True
306  output_line = "".join("".join(tokens[:2])+str(IDUP)+"".join(tokens[3:]))
307  except ValueError: # this is not a particle line
308  pass
309  event_lines += output_line if output_line is not None else input_line
310  return (is_event_changed, event_lines)
311 
312 
313 def gg4l_emu2all(input_event):
314  """!
315  Algorithm specific to gg4l Powheg process, to obtain an inclusive
316  sample starting from the only supported decay mode for ZZ production, 2e2mu.
317  """
318  is_event_changed = False
319  event_lines = ""
320 
321  channels_pdgIds = {}
322  channels_pdgIds["2e2mu"] = [11,13]
323  channels_pdgIds["2e2tau"] = [11,15]
324  channels_pdgIds["2mu2tau"] = [13,15]
325  channels_pdgIds["4e"] = [11,11]
326  channels_pdgIds["4mu"] = [13,13]
327  channels_pdgIds["4tau"] = [15,15]
328 
329  channels = ["2e2mu", "2e2tau", "2mu2tau", "4e", "4mu", "4tau"]
330  #probs = [xsec 2l2l', .., .., xsec 4l, .., ..]
331  probs = np.array([2/9, 2/9, 2/9, 1/9, 1/9, 1/9])
332  cumulative=np.cumsum(probs)
333 
334  for input_line in input_event.splitlines(True):
335  output_line = None
336  try: # interpret line as a particle
337  tokens = re.split(r"(\s+)", input_line)
338  if len(tokens) < 25: raise ValueError
339  IDUP = int(tokens[2])
340 
341  if abs(IDUP) == 11: #this is the electron part
342  if not is_event_changed:
343  idx = np.searchsorted(cumulative, np.random.uniform())
344  is_event_changed = True
345  if IDUP > 0:
346  IDUP = channels_pdgIds[channels[idx]][0]
347  else:
348  IDUP = -channels_pdgIds[channels[idx]][0]
349  output_line = "".join("".join(tokens[:2])+str(IDUP)+"".join(tokens[3:]))
350 
351  if abs(IDUP) == 13: #this is the muon part
352  if not is_event_changed:
353  idx = np.searchsorted(cumulative, np.random.uniform())
354  is_event_changed = True
355  if IDUP > 0:
356  IDUP = channels_pdgIds[channels[idx]][1]
357  else:
358  IDUP = -channels_pdgIds[channels[idx]][1]
359  output_line = "".join("".join(tokens[:2])+str(IDUP)+"".join(tokens[3:]))
360  except ValueError: # this is not a particle line
361  pass
362  event_lines += output_line if output_line is not None else input_line
363  return (is_event_changed, event_lines)
364 
365 
366 def update_XWGTUP_with_reweighted_nominal(input_event, wgtid_for_old_XWGTUP_value = None):
367  """! Ensure that XWGTUP is equal to the reweighted nominal."""
368  event_lines = ""
369  rwgt_nominal = None
370  XWGTUP = None
371  for input_line in input_event.splitlines(True): # loop first to fine reweighted nominal
372  if input_line.find("<wgt id='0'>") < 0:
373  continue
374  else:
375  try:
376  rwgt_nominal = ElementTree.fromstring(input_line.strip())
377  break # no need to continue the loop
378  except Exception:
379  raise IOError("Impossible to understand line with nominal weight from reweighting")
380  for input_line in input_event.splitlines(True):
381  output_line = None
382  if XWGTUP is None: # XWGTUP not yet found
383  try: # interpret line as a general event info line
384  NUP, IDPRUP, XWGTUP, SCALUP, AQEDUP, AQCDUP = input_line.split()
385  output_line = " %s %s %s %s %s %s\n"%(NUP, IDPRUP, rwgt_nominal.text, SCALUP, AQEDUP, AQCDUP)
386  except ValueError: # this is not a general event info line
387  pass
388  if input_line.find("</rwgt>") >= 0:
389  if (wgtid_for_old_XWGTUP_value is not None):# in this case, add the original value of XWGTUP as last weight
390  output_line = "<wgt id='%i'>%s</wgt>\n"%(wgtid_for_old_XWGTUP_value, XWGTUP)
391  output_line += input_line
392  event_lines += output_line if output_line is not None else input_line
393  return (event_lines)
394 
395 
396 def Powheg2LHEv3(input_event, name_to_ID):
397  """! Get new-style event weights from an input event string."""
398  # Return event as-is if there are no Powheg-style weights
399  if "#new weight,renfact,facfact,pdf1,pdf2" not in input_event:
400  return input_event
401  # Otherwise convert these to LHEv3 weights
402  if "<rwgt>" not in input_event:
403  nominal_weight = [x for x in input_event.split("\n")[1].split(" ") if len(x) > 0][2]
404  input_event = input_event[:input_event.find("#")] + "<rwgt>\n<wgt id='0'> {0} </wgt>\n</rwgt>\n".format(nominal_weight) + input_event[input_event.find("#"):]
405  weight_lines = "".join(["<wgt id='{}'> {} </wgt>\n".format(name_to_ID[weight[0]], weight[1]) for weight in string_to_weight(input_event)])
406  return input_event[:input_event.find("</rwgt>")] + weight_lines + input_event[input_event.find("</rwgt>"):input_event.find("#new weight,renfact,facfact,pdf1,pdf2")] + "</event>\n"
407 
408 
409 def reindent_XML(elem):
410  """! Re-indent XML so that elements are on their own line."""
411  if len(elem):
412  if not elem.text or not elem.text.strip():
413  elem.text = "\n"
414  if not elem.tail or not elem.tail.strip():
415  elem.tail = "\n"
416  for subelem in elem:
417  reindent_XML(subelem)
418  if not elem.tail or not elem.tail.strip():
419  elem.tail = "\n"
420  else:
421  if not elem.tail or not elem.tail.strip():
422  elem.tail = "\n"
python.utility.LHE.preamble
def preamble(input_LHE_file)
Get opening lines from file as a string.
Definition: LHE.py:121
replace
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition: hcg.cxx:307
python.utility.LHE.event_counter
def event_counter(input_file_pattern)
Count total number of events in input files.
Definition: LHE.py:89
python.utility.LHE.get_first_event
def get_first_event(input_LHE_file)
Get first event from file as a string.
Definition: LHE.py:172
max
#define max(a, b)
Definition: cfImp.cxx:41
vtune_athena.format
format
Definition: vtune_athena.py:14
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
python.utility.LHE.update_XWGTUP_with_reweighted_nominal
def update_XWGTUP_with_reweighted_nominal(input_event, wgtid_for_old_XWGTUP_value=None)
Ensure that XWGTUP is equal to the reweighted nominal.
Definition: LHE.py:366
python.utility.LHE.event_weight_iterator
def event_weight_iterator(input_files)
Python generator to iterate through event weights from input LHE files.
Definition: LHE.py:83
python.utility.LHE.mu2e
def mu2e(input_event)
Swap out muons for electrons, and muon neutrinos for electron neutrinos.
Definition: LHE.py:262
python.utility.LHE.event_iterator
def event_iterator(input_files, verbose=True)
Python generator to iterate through events from input LHE files.
Definition: LHE.py:58
dumpHVPathFromNtuple.append
bool append
Definition: dumpHVPathFromNtuple.py:91
python.utility.LHE._open_file
def _open_file(filename)
Definition: LHE.py:17
python.utility.LHE.opening_tag
def opening_tag(input_LHE_file)
Get <LesHouchesEvents> opening tag from file as a string.
Definition: LHE.py:137
python.utility.LHE.postamble
def postamble(input_LHE_file)
Get closing lines from file as a string.
Definition: LHE.py:129
python.utility.LHE.string_to_weight
def string_to_weight(input_event)
Get weight name/value pairs from an input string.
Definition: LHE.py:181
python.utility.LHE.header_block
def header_block(input_LHE_file)
Get <header> block from file as a string.
Definition: LHE.py:153
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
python.utility.LHE.add_weight_to_header
def add_weight_to_header(header, weightgroup_name, weight_name, weight_id)
Add a weight to a header passed as input (can be a string or an ElementTree).
Definition: LHE.py:96
DerivationFramework::TriggerMatchingUtils::sorted
std::vector< typename T::value_type > sorted(T begin, T end)
Helper function to create a sorted vector from an unsorted one.
python.utility.LHE.gg4l_emu2all
def gg4l_emu2all(input_event)
Algorithm specific to gg4l Powheg process, to obtain an inclusive sample starting from the only suppo...
Definition: LHE.py:313
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.utility.LHE.Powheg2LHEv3
def Powheg2LHEv3(input_event, name_to_ID)
Get new-style event weights from an input event string.
Definition: LHE.py:396
python.utility.LHE.e2mu
def e2mu(input_event)
Swap out electrons for muons, and electron neutrinos for muon neutrinos.
Definition: LHE.py:287
Trk::open
@ open
Definition: BinningType.h:40
python.utility.LHE.ensure_coloured_quarks
def ensure_coloured_quarks(input_event)
Ensure that all final-state quarks in the event are coloured.
Definition: LHE.py:188
python.utility.LHE.mu2tau
def mu2tau(input_event)
Swap out muons for taus, and muon neutrinos for tau neutrinos.
Definition: LHE.py:211
python.utility.LHE.comment_block
def comment_block(input_LHE_file)
Get comment block from file as a string.
Definition: LHE.py:145
python.utility.LHE.merge
def merge(input_file_pattern, output_file)
Merge many input LHE files into a single output file.
Definition: LHE.py:29
str
Definition: BTagTrackIpAccessor.cxx:11
python.utility.LHE.init_block
def init_block(input_LHE_file)
Get <init> block from file as a string.
Definition: LHE.py:161
readCCLHist.float
float
Definition: readCCLHist.py:83
Trk::split
@ split
Definition: LayerMaterialProperties.h:38
python.utility.LHE.e2tau
def e2tau(input_event)
Swap out electrons for taus, and electron neutrinos for tau neutrinos.
Definition: LHE.py:237
python.utility.LHE.reindent_XML
def reindent_XML(elem)
Re-indent XML so that elements are on their own line.
Definition: LHE.py:409