ATLAS Offline Software
Loading...
Searching...
No Matches
EvgenParserTool.evgenParserTool Class Reference
Collaboration diagram for EvgenParserTool.evgenParserTool:

Public Member Functions

 __init__ (self, name='evgenLogParser', **kw)
 processLine (self, line)
 report (self)
 updateMetadata (self, metadata)

Public Attributes

 msg = Logging.logging.getLogger(name)
dict FixHepMCDict = {'reasons':{},'denominator':0,'lines':{}}
dict TestHepMCDict = {'p':0,'f':0,'pfline':None,'rate':{},'notinc':[],'rline':None,'effline':None,'lastpf':0}
dict FilterSeqDict = {'num':0,'den':0,'wnum':0,'wden':0,'line':None,'wline':None}
dict MetadataDict
int isMP = -1
bool isSherpa = False

Detailed Description

Definition at line 12 of file EvgenParserTool.py.

Constructor & Destructor Documentation

◆ __init__()

EvgenParserTool.evgenParserTool.__init__ ( self,
name = 'evgenLogParser',
** kw )

Definition at line 14 of file EvgenParserTool.py.

14 def __init__ ( self, name = 'evgenLogParser', **kw ):
15 self.msg = Logging.logging.getLogger(name)
16 # For FixHepMC, keep a list of reasons for rejection, counts, and lines for logging
17 self.FixHepMCDict = {'reasons':{},'denominator':0,'lines':{}}
18 # For TestHepMC, we need summary statistics as well as all the individual numbers
19 # Keep number passing and failing, p/f line (for printing), all the rates with the lines, which ones are(n't) included in the efficiency
20 # the line for the rates, the line for the final efficiency, and the number of events in this log passing/failing (for converting % to count)
21 self.TestHepMCDict = {'p':0,'f':0,'pfline':None,'rate':{},'notinc':[],'rline':None,'effline':None,'lastpf':0}
22 # For the filter sequence, keep the weighted and unweighted numbers and the line
23 self.FilterSeqDict = {'num':0,'den':0,'wnum':0,'wden':0,'line':None,'wline':None}
24 # Process the Metadata as well
25 self.MetadataDict = {'sumOfPosWeights':0.,'sumOfNegWeights':0.,'sumOfSqrWeights':0.,
26 'sumOfPosWeightsNoFilter':0.,'sumOfNegWeightsNoFilter':0.,'sumOfSqrWeightsNoFilter':0.,
27 'xsec_holder':0.,'xsec_weight':0.,'xsec_sum':0.}
28 self.isMP = -1
29 self.isSherpa = False
30

Member Function Documentation

◆ processLine()

EvgenParserTool.evgenParserTool.processLine ( self,
line )
Function to process a log line and keep what's needed for final reporting

Definition at line 31 of file EvgenParserTool.py.

31 def processLine( self, line ):
32 ''' Function to process a log line and keep what's needed for final reporting'''
33
34 # Skip PerfMonMTSvc report
35 if "PerfMonMTSvc" in line:
36 return
37
38 # First check for lines from FixHepMC
39 if 'FixHepMC' in line and 'INFO Removed' in line:
40 # Use the loops line to count the denominator
41 if 'because of loops' in line:
42 self.FixHepMCDict['denominator'] += int( line.split(' of ')[1].split()[0].strip() )
43 # Grab the reason for failure from the line; they all follow the same formula
44 reason = line.split('particles')[1]
45 # Add to the count for that reason
46 if reason not in self.FixHepMCDict['reasons']:
47 self.FixHepMCDict['reasons'][reason] = 0
48 self.FixHepMCDict['reasons'][reason] += int( line.split('Removed')[1].split()[0].strip() )
49 # Make sure that we also have a log line for printing later if we need it
50 if reason not in self.FixHepMCDict['lines']:
51 self.FixHepMCDict['lines'][reason] = [ line.split('Removed')[0] , line.split('particles')[1] ]
52 # Second up: Filter statistics
53 elif 'Py:EvgenFilterSeq' in line:
54 # First check weighted, then unweighted (one is a substring of the other...)
55 if 'Weighted Filter Efficiency' in line:
56 # Grab all the numbers from the log. The line prints the numerator and denominator explicitly at the end
57 numbers = re.findall(r'[\d.]+',line)
58 self.FilterSeqDict['wnum'] += float( numbers[-2] )
59 self.FilterSeqDict['wden'] += float( numbers[-1] )
60 # Grab the log line to print
61 self.FilterSeqDict['wline'] = line.split('=')[0]
62 elif 'Filter Efficiency' in line:
63 # Grab all the numbers from the log. The line prints the numerator and denominator explicitly at the end
64 numbers = re.findall(r'[\d.]+',line)
65 self.FilterSeqDict['num'] += int( numbers[-2] )
66 self.FilterSeqDict['den'] += int( numbers[-1] )
67 # Grab the log line to print
68 if self.FilterSeqDict['line'] is None:
69 self.FilterSeqDict['line'] = line.split('=')[0]
70 # Third and finally: TestHepMC
71 elif 'TestHepMC' in line and 'Event' in line:
72 if 'Events passed' in line:
73 # Simplest line, just shows the numbers of events passing and failing
74 numbers = re.findall(r'[\d.]+',line)
75 self.TestHepMCDict['p'] += int(numbers[-2])
76 self.TestHepMCDict['f'] += int(numbers[-1])
77 # Save the log line so that we can reproduce it later
78 self.TestHepMCDict['pfline'] = line.split('=')[0]
79 # Keep track of the denominators to ensure we get the percentages right
80 self.TestHepMCDict['lastpf'] = int(numbers[-2]) + int(numbers[-1])
81 else:
82 # Otherwise this is an event rate line. First figure out what the reason in the line is, and index count on that
83 reason = line.split('Event rate')[1].split('=')[0]
84 if reason not in self.TestHepMCDict['rate']:
85 self.TestHepMCDict['rate'][reason] = 0
86 # Some rates are not included in the test efficiency; we should be ready to point that out as well
87 if 'not included in test efficiency' in line:
88 self.TestHepMCDict['notinc'] += [reason]
89 # We have to convert the logged percentage back to counts, and then back to percentage in the report
90 my_perc = float( line.split('=')[1].split('%')[0] )
91 self.TestHepMCDict['rate'][reason] += my_perc * self.TestHepMCDict['lastpf']/100.
92 # And last we just have to get the log format for an event rate line right
93 self.TestHepMCDict['rline'] = line.split('Event rate')[0]
94 # Extra catch for Efficiency line for TestHepMC
95 elif 'TestHepMC' in line and 'Efficiency' in line:
96 # If this is just the efficiency line, all we need is the log line format
97 self.TestHepMCDict['effline'] = line.split('=')[0]
98 # Because this is a unique line, we will also use it to check if we are running MP
99 self.isMP += 1
100 elif 'MetaData:' in line:
101 # If it's a metadata line, we just sum them up, with one exception
102 field = line.split('MetaData:')[1].split('=')[0].strip()
103 if field in self.MetadataDict:
104 self.MetadataDict[field] += float( line.split('=')[1] )
105 # Check if we're dealing with Sherpa, in which case our cross section calculation has to change
106 if field == 'generatorName' and 'Sherpa' in line:
107 self.isSherpa = True
108 # Cross section requires special attention
109 # The cross section field itself comes first, so we have to just stash it
110 if field == 'cross-section (nb)':
111 self.MetadataDict['xsec_holder'] = float( line.split('=')[1] )
112 # The weights fields are also there; we need the 'no filter' versions
113 # Use = here to reset each round
114 elif field == 'sumOfPosWeightsNoFilter':
115 self.MetadataDict['xsec_weight'] = float( line.split('=')[1] )
116 # Sum of negative weights is last, and now we have all the info we need
117 elif field == 'sumOfNegWeightsNoFilter':
118 my_negw = float( line.split('=')[1] )
119 # Get the more complicated item for the cross-section calculation later
120 if self.isSherpa:
121 if self.MetadataDict['xsec_holder'] != 0:
122 # Use the cross section in pb in this calculation
123 self.MetadataDict['xsec_sum'] += (self.MetadataDict['xsec_weight']-my_negw)/(self.MetadataDict['xsec_holder']*1000.)
124 else:
125 self.MetadataDict['xsec_sum'] += self.MetadataDict['xsec_holder']*(self.MetadataDict['xsec_weight']-my_negw)
126 # We don't need to keep the sum of weights here, because we have it elsewhere
127
128
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177

◆ report()

EvgenParserTool.evgenParserTool.report ( self)
Function to print final statistics grabbed from the logs

Definition at line 129 of file EvgenParserTool.py.

129 def report(self):
130 ''' Function to print final statistics grabbed from the logs'''
131 # If we aren't running MP, then forget it
132 if self.isMP<1:
133 self.msg.debug('Not running MP, nothing to do')
134 return
135 # Now we are going to print updated statistics for all the handlers
136 # Because in an MP job we get a primary worker report as well, the number of workers is just self.isMP
137 self.msg.info(f'Printing final summary statistics from {self.isMP} MP workers')
138 # First print all the FixHepMC stuff - just rates for each reason we remove particles
139 for reason in self.FixHepMCDict['reasons']:
140 print(f"{self.FixHepMCDict['lines'][reason][0]}Removed {self.FixHepMCDict['reasons'][reason]} of {self.FixHepMCDict['denominator']} particles {self.FixHepMCDict['lines'][reason][1]}")
141 # Next print all the information from TestHepMC, starting with the pass/fail summary
142 print(f"{self.TestHepMCDict['pfline']}= {self.TestHepMCDict['p']}, Events Failed = {self.TestHepMCDict['f']}")
143 # We will have the same denominator for all the ratios that we print next
144 denom = self.TestHepMCDict['p'] + self.TestHepMCDict['f']
145 # Now go through all the TestHepMC checks, and for each one recreate the log line
146 for rate in self.TestHepMCDict['rate']:
147 # Again, have to handle zero denominators correctly
148 pct = 0.
149 if denom>0:
150 pct = (self.TestHepMCDict['rate'][rate] / denom)*100.
151 print(f"{self.TestHepMCDict['rline']}Event rate {rate} = {pct}%{' (not included in test efficiency)' if rate in self.TestHepMCDict['notinc'] else ''}")
152 # Now print the final efficiency; make sure we handle zeroes correctly
153 final_eff = 1.
154 if denom:
155 final_eff = self.TestHepMCDict['p']/denom
156 print(f"{self.TestHepMCDict['effline']}= {final_eff*100.}%")
157 # Now print our filter sequence summary - again, make sure we handle zeroes correctly
158 eff = (self.FilterSeqDict['num']/self.FilterSeqDict['den']) if self.FilterSeqDict['den'] > 0 else 1.
159 print(f"{self.FilterSeqDict['line']} = {eff} [{self.FilterSeqDict['num']} / {self.FilterSeqDict['den']}]")
160 eff = (self.FilterSeqDict['wnum']/self.FilterSeqDict['wden']) if self.FilterSeqDict['wden'] >0 else 1.
161 print(f"{self.FilterSeqDict['wline']} = {eff} [{self.FilterSeqDict['wnum']} / {self.FilterSeqDict['wden']}]")
162 # Done!
163
const bool debug
void print(char *figname, TCanvas *c1)

◆ updateMetadata()

EvgenParserTool.evgenParserTool.updateMetadata ( self,
metadata )
Update the metadata based on the logfile information

Definition at line 164 of file EvgenParserTool.py.

164 def updateMetadata(self, metadata):
165 ''' Update the metadata based on the logfile information'''
166 # If we aren't running MP, then forget it
167 if self.isMP<1:
168 self.msg.debug('Not running MP, nothing to do')
169 return metadata
170 # Print the updated metadata as we go, as well as updating the dictionary
171 # First by convention is the cross-section, which we have to calculate
172 my_xsec = 0.
173 # Special calculation for Sherpa; see also AGENE-2385
174 if self.isSherpa:
175 numer = self.MetadataDict['sumOfPosWeightsNoFilter']-self.MetadataDict['sumOfNegWeightsNoFilter']
176 if self.MetadataDict['xsec_sum'] > 0:
177 # Convert back to nb
178 my_xsec = numer / self.MetadataDict['xsec_sum'] / 1000.
179 else:
180 denom = self.MetadataDict['sumOfPosWeightsNoFilter']-self.MetadataDict['sumOfNegWeightsNoFilter']
181 if denom > 0.:
182 my_xsec = self.MetadataDict['xsec_sum'] / denom
183 self.msg.info(f'cross-section (nb)= {my_xsec:e}')
184 metadata['cross-section (nb)'] = f'{my_xsec:e}'
185 # Now come all the fields that we had saved
186 self.msg.info('Updated metadata:')
187 for field in self.MetadataDict:
188 # Need to skip the cross section related fields
189 if field in metadata:
190 self.msg.info(f'{field} = {self.MetadataDict[field]:e}')
191 metadata[field] = f'{self.MetadataDict[field]:e}'
192 # Generator filter efficiency needs some special handling
193 geneff = 1.
194 if self.MetadataDict['sumOfPosWeightsNoFilter']-self.MetadataDict['sumOfNegWeightsNoFilter']>0:
195 geneff = (self.MetadataDict['sumOfPosWeights']-self.MetadataDict['sumOfNegWeights'])/(self.MetadataDict['sumOfPosWeightsNoFilter']-self.MetadataDict['sumOfNegWeightsNoFilter'])
196 self.msg.info(f'GenFiltEff = {geneff:e}')
197 if 'GenFiltEff' in metadata:
198 metadata['GenFiltEff'] = f'{geneff:e}'
199 return metadata
200

Member Data Documentation

◆ FilterSeqDict

dict EvgenParserTool.evgenParserTool.FilterSeqDict = {'num':0,'den':0,'wnum':0,'wden':0,'line':None,'wline':None}

Definition at line 23 of file EvgenParserTool.py.

◆ FixHepMCDict

dict EvgenParserTool.evgenParserTool.FixHepMCDict = {'reasons':{},'denominator':0,'lines':{}}

Definition at line 17 of file EvgenParserTool.py.

◆ isMP

int EvgenParserTool.evgenParserTool.isMP = -1

Definition at line 28 of file EvgenParserTool.py.

◆ isSherpa

bool EvgenParserTool.evgenParserTool.isSherpa = False

Definition at line 29 of file EvgenParserTool.py.

◆ MetadataDict

dict EvgenParserTool.evgenParserTool.MetadataDict
Initial value:
= {'sumOfPosWeights':0.,'sumOfNegWeights':0.,'sumOfSqrWeights':0.,
'sumOfPosWeightsNoFilter':0.,'sumOfNegWeightsNoFilter':0.,'sumOfSqrWeightsNoFilter':0.,
'xsec_holder':0.,'xsec_weight':0.,'xsec_sum':0.}

Definition at line 25 of file EvgenParserTool.py.

◆ msg

EvgenParserTool.evgenParserTool.msg = Logging.logging.getLogger(name)

Definition at line 15 of file EvgenParserTool.py.

◆ TestHepMCDict

dict EvgenParserTool.evgenParserTool.TestHepMCDict = {'p':0,'f':0,'pfline':None,'rate':{},'notinc':[],'rline':None,'effline':None,'lastpf':0}

Definition at line 21 of file EvgenParserTool.py.


The documentation for this class was generated from the following file: