ATLAS Offline Software
lhe_splitter.py
Go to the documentation of this file.
1 #! /usr/bin/env python3
2 
3 # Copyright (C) 2002-2023 CERN for the benefit of the ATLAS collaboration
4 #
5 
6 
7 import sys
8 import argparse
9 import math
10 import os
11 
12 def main():
13  # Grab and parse the arguments for the script
14  parser = argparse.ArgumentParser(description="Split an LHE file into chunks")
15  parser.add_argument('inputFile', help="Input file")
16  parser.add_argument('--events', '-e', type=int, required=True,
17  help="Number of events per output file")
18  parser.add_argument('--directory', '-d', help='Output directory (optional; default ".")')
19  args = vars(parser.parse_args(sys.argv[1:]))
20 
21  # Pop open the input file
22  if not os.access( args['inputFile'] , os.R_OK ):
23  print(f'Error: cannot access input file {args["inputFile"][0]}')
24  return 1
25  in_file = open( args['inputFile'] , 'r' )
26 
27  # Do a little name parsing to figure out the name of the output file(s)
28  last_dot = os.path.basename( args['inputFile'] ).rfind('.')
29  out_stem = os.path.basename( args['inputFile'] )[ : last_dot ]
30  if 'directory' in args and args['directory'] is not None:
31  out_stem = args['directory']+'/'+out_stem
32  out_ext = '' if last_dot < 0 else os.path.basename( args['inputFile'] )[ last_dot : ]
33 
34  # Count the number of events in the file
35  n_events = 0
36  for line in in_file:
37  if '<event>' in line or '<event ' in line:
38  n_events+=1
39 
40  # Do a little error checking
41  print(f'Attempting to split file of {n_events} events into files of {args["events"]} events each')
42  if n_events < args['events']:
43  print(f'Fewer events than requested {args["events"]} in input file. No action needed.')
44  return 0
45  if not (n_events/args['events']).is_integer():
46  print(f'Warning: the final file will only have {n_events%args["events"]} events')
47 
48  # Reset to the beginning of the file
49  in_file.seek(0,0)
50 
51  # Pop open all our output files
52  n_files = math.ceil(n_events/args['events'])
53  out_files = []
54  for i in range(n_files):
55  if os.access( f'{out_stem}_{i}{out_ext}' , os.R_OK ):
56  print(f'Error: output file {out_stem}_{i}{out_ext} exists. Please cleanup and try again')
57  sys.exit(1)
58  out_files += [ open( f'{out_stem}_{i}{out_ext}', 'w' ) ]
59 
60  # Read back through the file, and let's parse!
61  # Keep track of which event in the file we've reached
62  this_event = -1
63  for line in in_file:
64  # See if we're in the header or the footer
65  # If we are, it goes into ALL the files
66  if (this_event==-1 and '<event>' not in line and '<event ' not in line) or this_event==n_events:
67  for i in range(n_files):
68  out_files[i].write(line)
69  # Use a continue just to tidy up the next conditions
70  continue
71 
72  # See if we're reading an event
73  if '<event>' in line or '<event ' in line:
74  this_event += 1
75 
76  # Which output file do we want?
77  my_file = math.floor(this_event/args['events'])
78  if my_file==n_files:
79  print(f'Uh oh. {this_event} {args["events"]} {my_file}')
80  out_files[my_file].write(line)
81 
82  # Special condition: we've reached the end
83  if '</event>' in line and this_event==n_events-1:
84  this_event += 1
85 
86  # Close up
87  for i in range(n_files):
88  out_files[i].close()
89  in_file.close()
90 
91  # All done
92  return 0
93 
94 if __name__ == "__main__":
95  sys.exit(main())
python.ByteStreamConfig.write
def write
Definition: Event/ByteStreamCnvSvc/python/ByteStreamConfig.py:248
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
Trk::open
@ open
Definition: BinningType.h:40
Muon::print
std::string print(const MuPatSegment &)
Definition: MuonTrackSteering.cxx:28
lhe_splitter.main
def main()
Definition: lhe_splitter.py:12