ATLAS Offline Software
Loading...
Searching...
No Matches
lhe_splitter.py
Go to the documentation of this file.
1#! /usr/bin/env python3
2
3# Copyright (C) 2002-2023 CERN for the benefit of the ATLAS collaboration
4#
5
6
7import sys
8import argparse
9import math
10import os
11
12def main():
13 # Grab and parse the arguments for the script
14 parser = argparse.ArgumentParser(description="Split an LHE file into chunks")
15 parser.add_argument('inputFile', help="Input file")
16 parser.add_argument('--events', '-e', type=int, required=True,
17 help="Number of events per output file")
18 parser.add_argument('--directory', '-d', help='Output directory (optional; default ".")')
19 args = vars(parser.parse_args(sys.argv[1:]))
20
21 # Pop open the input file
22 if not os.access( args['inputFile'] , os.R_OK ):
23 print(f'Error: cannot access input file {args["inputFile"][0]}')
24 return 1
25 in_file = open( args['inputFile'] , 'r' )
26
27 # Do a little name parsing to figure out the name of the output file(s)
28 last_dot = os.path.basename( args['inputFile'] ).rfind('.')
29 out_stem = os.path.basename( args['inputFile'] )[ : last_dot ]
30 if 'directory' in args and args['directory'] is not None:
31 out_stem = args['directory']+'/'+out_stem
32 out_ext = '' if last_dot < 0 else os.path.basename( args['inputFile'] )[ last_dot : ]
33
34 # Count the number of events in the file
35 n_events = 0
36 for line in in_file:
37 if '<event>' in line or '<event ' in line:
38 n_events+=1
39
40 # Do a little error checking
41 print(f'Attempting to split file of {n_events} events into files of {args["events"]} events each')
42 if n_events < args['events']:
43 print(f'Fewer events than requested {args["events"]} in input file. No action needed.')
44 return 0
45 if not (n_events/args['events']).is_integer():
46 print(f'Warning: the final file will only have {n_events%args["events"]} events')
47
48 # Reset to the beginning of the file
49 in_file.seek(0,0)
50
51 # Pop open all our output files
52 n_files = math.ceil(n_events/args['events'])
53 out_files = []
54 for i in range(n_files):
55 if os.access( f'{out_stem}_{i}{out_ext}' , os.R_OK ):
56 print(f'Error: output file {out_stem}_{i}{out_ext} exists. Please cleanup and try again')
57 sys.exit(1)
58 out_files += [ open( f'{out_stem}_{i}{out_ext}', 'w' ) ]
59
60 # Read back through the file, and let's parse!
61 # Keep track of which event in the file we've reached
62 this_event = -1
63 for line in in_file:
64 # See if we're in the header or the footer
65 # If we are, it goes into ALL the files
66 if (this_event==-1 and '<event>' not in line and '<event ' not in line) or this_event==n_events:
67 for i in range(n_files):
68 out_files[i].write(line)
69 # Use a continue just to tidy up the next conditions
70 continue
71
72 # See if we're reading an event
73 if '<event>' in line or '<event ' in line:
74 this_event += 1
75
76 # Which output file do we want?
77 my_file = math.floor(this_event/args['events'])
78 if my_file==n_files:
79 print(f'Uh oh. {this_event} {args["events"]} {my_file}')
80 out_files[my_file].write(line)
81
82 # Special condition: we've reached the end
83 if '</event>' in line and this_event==n_events-1:
84 this_event += 1
85
86 # Close up
87 for i in range(n_files):
88 out_files[i].close()
89 in_file.close()
90
91 # All done
92 return 0
93
94if __name__ == "__main__":
95 sys.exit(main())
void print(char *figname, TCanvas *c1)