ATLAS Offline Software
Loading...
Searching...
No Matches
OutputAnalysisConfig.py
Go to the documentation of this file.
1# Copyright (C) 2002-2022 CERN for the benefit of the ATLAS collaboration
2
3# AnaAlgorithm import(s):
4from AnalysisAlgorithmsConfig.ConfigBlock import ConfigBlock
5from AnalysisAlgorithmsConfig.ConfigAccumulator import DataType
6from AnalysisAlgorithmsConfig.ConfigBlock import filter_dsids
7from AthenaCommon.Logging import logging
8import copy, re
9
10class OutputAnalysisConfig (ConfigBlock):
11 """the ConfigBlock for the MET configuration"""
12
13 def __init__ (self) :
14 super (OutputAnalysisConfig, self).__init__ ()
15 self.addOption ('postfix', '', type=str,
16 info="a postfix to apply to decorations and algorithm names. "
17 "Typically not needed here.")
18 self.addOption ('vars', [], type=list,
19 info="a list of mappings (list of strings) between containers and "
20 "decorations to output branches.")
21 self.addOption ('varsOnlyForMC', [], type=list,
22 info="same as `vars`, but for MC-only variables so as to avoid a "
23 "crash when running on data.")
24 self.addOption ('metVars', [], type=list,
25 info="a list of mappings (list of strings) between containers "
26 "and decorations to output branches. Specficially for MET "
27 "variables, where only the final MET term is retained.")
28 self.addOption ('truthMetVars', [], type=list,
29 info="a list of mappings (list of strings) between containers "
30 "and decorations to output branches for truth MET.")
31 self.addOption ('containers', {}, type=dict,
32 info="a dictionary mapping prefixes (key) to container names "
33 "(values) to be used when saving to the output tree. Branches "
34 "are then of the form `prefix_decoration`.")
35 self.addOption ('containersFullMET', {}, type=dict,
36 info="same as `containers`, but for MET containers that should be "
37 "saved with all terms (as opposed to just the final term). This "
38 "is useful for special studies. A container can appear both here and "
39 "in containers (with different prefixes).")
40 self.addOption ('containersOnlyForMC', {}, type=dict,
41 info="same as `containers`, but for MC-only containers so as to avoid "
42 "a crash when running on data.")
43 self.addOption ('containersOnlyForDSIDs', {}, type=dict,
44 info="specify which DSIDs are allowed to produce a given container. "
45 "This works like `onlyForDSIDs`: pass a list of DSIDs or regexps.")
46 self.addOption ('nonContainers', ['EventInfo'], type=list,
47 info="a list of container names that are not actual containers but should be treated as non-containers.")
48 self.addOption ('treeName', 'analysis', type=str,
49 info="name of the output TTree to save.")
50 self.addOption ('streamName', 'ANALYSIS', type=str,
51 info="name of the output stream to save the tree in.")
52 self.addOption ('metTermName', 'Final', type=str,
53 info="the name of the MET term to save, turning the MET "
54 "container into a single object.")
55 self.addOption ('truthMetTermName', 'NonInt', type=str,
56 info="the name of the truth MET term to save, turning the MET "
57 "container into a single object.")
58 self.addOption ('storeSelectionFlags', True, type=bool,
59 info="whether to store one branch for each object selection.")
60 self.addOption ('selectionFlagPrefix', 'select', type=str,
61 info="the prefix used when naming selection branches.")
62 self.addOption ('commands', [], type=list,
63 info="a list of strings containing commands (regexp strings "
64 "prefaced by the keywords `enable` or `disable`) to turn on/off the "
65 "writing of branches to the output ntuple. If left empty, do not modify "
66 "the scheduled output branches.")
67 self.addOption ('commandsOnlyForDSIDs', {}, type=dict,
68 info="a dictionary with individual DSIDs as keys, and a list of strings "
69 "like for the `commands` option as items. These `commands` will only be run "
70 "for the corresponding DSID.")
71 self.addOption ('alwaysAddNosys', False, type=bool,
72 info="If set to `True`, all branches will be given a systematics suffix, "
73 "even if they have no systematics (beyond the nominal).")
74 self.addOption ('skipRedundantSelectionFlags', True, type=bool,
75 info="remove the redundant `outputSelect` branches created by the `Thinning` step. "
76 "These could however be used to simplify downstream workflows, as in Easyjet.")
77 self.addOption ('defaultBasketSize', None, type=int,
78 info="default basket size for all branches in the output tree. "
79 "If not set (the default), no basket size is configured and ROOT's "
80 "default will be used.")
81 # helper to protect for second pass
82 self.validated = False
83
84 def instanceName (self) :
85 """Return the instance name for this block"""
86 if self.postfix is not None and self.postfix != '':
87 return self.postfix
88 return self.treeName
89
90 @staticmethod
91 def branchSortOrder (rule):
92 return rule.split('->')[1].strip()
93
94 def createOutputAlgs (self, config, name, vars):
95 """A helper function to create output algorithm"""
96 alg = config.createAlgorithm('CP::AsgxAODNTupleMakerAlg', name)
97 alg.TreeName = self.treeName
98 alg.RootStreamName = self.streamName
99 alg.NonContainers = list(self.nonContainers)
100 branchList = list(vars)
101 branchList.sort(key=self.branchSortOrder)
102 branchList_nosys = [branch for branch in branchList if "%SYS%" not in branch]
103 branchList_sys = [branch for branch in branchList if "%SYS%" in branch]
104 alg.Branches = branchList_nosys + branchList_sys
105 if self.defaultBasketSize is not None:
106 alg.DefaultBasketSize = self.defaultBasketSize
107 return alg
108
109 def makeAlgs (self, config) :
110
111 log = logging.getLogger('OutputAnalysisConfig')
112
113 # do some transformations of the options we should only do once
114 if not self.validated:
115
116 self.containers = dict(self.containers)
117 self.vars = set(self.vars)
119 self.metVars = set(self.metVars)
121
122 # check for overlaps between containers and containersFullMET
123 overlapping_keys = set(self.containers.keys()).intersection(self.containersFullMET.keys())
124 if overlapping_keys:
125 # convert the set of overlapping keys to a list of strings for the message (represents the empty string too!)
126 keys_message = [repr(key) for key in overlapping_keys]
127 raise KeyError(f"containersFullMET would overwrite the following container keys: {', '.join(keys_message)}")
128 # move items in self.containersFullMET to containers
130
131 # merge the MC-specific branches and containers into the main list/dictionary only if we are not running on data
132 if config.dataType() is not DataType.Data:
133 self.vars |= self.varsOnlyForMC
134
135 # protect 'containers' against being overwritten
136 # find overlapping keys
137 overlapping_keys = set(self.containers.keys()).intersection(self.containersOnlyForMC.keys())
138 if overlapping_keys:
139 # convert the set of overlapping keys to a list of strings for the message (represents the empty string too!)
140 keys_message = [repr(key) for key in overlapping_keys]
141 raise KeyError(f"containersOnlyForMC would overwrite the following container keys: {', '.join(keys_message)}")
142
143 # move items in self.containersOnlyForMC to self.containers
145 # clear the dictionary to avoid overlapping key error during the second pass
146 self.containersOnlyForMC.clear()
147
148 # now filter the containers depending on DSIDs
150 for container, dsid_filters in self.containersOnlyForDSIDs.items():
151 if container not in self.containers:
152 log.warning("Skipping unrecognised container prefix '%s' for DSID-filtering in OutputAnalysisConfig...", container)
153 continue
154 if not filter_dsids (dsid_filters, config):
155 # if current DSID is not allowed for this container, remove it
156 log.info("Skipping container prefix '%s' due to DSID filtering...", container)
157 # filter branches for validated containers
158 for var in set(self.vars): # make a copy of the list to avoid modifying it while iterating
159 var_container = var.split('.')[0].replace('_NOSYS', '').replace('_%SYS%', '')
160 if var_container == self.containers[container]:
161 self.vars.remove(var)
162 log.info("Skipping branch definition '%s' for excluded container %s...", var, var_container)
163 # filter branches for MET variables
164 for var in set(self.metVars): # make a copy of the list to avoid modifying it while iterating
165 var_container = var.split('.')[0].replace('_NOSYS', '').replace('_%SYS%', '')
166 if var_container == self.containers[container]:
167 self.metVars.remove(var)
168 log.info("Skipping MET branch definition '%s' for excluded container %s...", var, var_container)
169 # filter branches for truth MET variables
170 for var in set(self.truthMetVars): # make a copy of the list to avoid modifying it while iterating
171 var_container = var.split('.')[0].replace('_NOSYS', '').replace('_%SYS%', '')
172 if var_container == self.containers[container]:
173 self.truthMetVars.remove(var)
174 log.info("Skipping truth MET branch definition '%s' for excluded container %s...", var, var_container)
175 # remove the container from the list at the end
176 self.containers.pop (container)
177 # clear the dictionary to avoid warnings during the second pass
178 self.containersOnlyForDSIDs.clear()
179
180 # at this point we are OK
181 self.validated = True
182
184 self.createSelectionFlagBranches(config)
185
186 outputConfigs = {}
187 for prefix in self.containers.keys() :
188 containerName = self.containers[prefix]
189 outputDict = config.getOutputVars (containerName)
190 for outputName in outputDict :
191 outputConfig = copy.deepcopy (outputDict[outputName])
192 if containerName != outputConfig.origContainerName or config.checkOutputContainer(containerName):
193 outputConfig.outputContainerName = containerName + '_%SYS%'
194 else:
195 outputConfig.outputContainerName = config.readName(containerName)
196 outputConfig.prefix = prefix
197 # if the container is a MET container with all terms, we
198 # also need to write out the name of each MET term
199 if prefix in self.containersFullMET and outputConfig.variableName == 'name':
200 outputConfig.enabled = True
201 outputConfigs[prefix + outputName] = outputConfig
202
203 # check for DSID-specific commands
204 for dsid, dsid_commands in self.commandsOnlyForDSIDs.items():
205 if filter_dsids([dsid], config):
206 self.commands += dsid_commands
207
208 outputConfigsRename = {}
209 for command in self.commands :
210 words = command.split (' ')
211 if len (words) == 0 :
212 raise ValueError ('received empty command for "commands" option')
213 optional = words[0] == 'optional'
214 if optional :
215 words = words[1:] # remove the 'optional' keyword
216 if words[0] == 'enable' :
217 if len (words) != 2 :
218 raise ValueError ('enable takes exactly one argument: ' + command)
219 used = False
220 for name in outputConfigs :
221 if re.match (words[1], name) :
222 outputConfigs[name].enabled = True
223 used = True
224 if not used and not optional and config.dataType() is not DataType.Data:
225 raise KeyError ('unknown branch pattern for enable: ' + words[1])
226 elif words[0] == 'disable' :
227 if len (words) != 2 :
228 raise ValueError ('disable takes exactly one argument: ' + command)
229 used = False
230 for name in outputConfigs :
231 if re.match (words[1], name) :
232 outputConfigs[name].enabled = False
233 used = True
234 if not used and not optional and config.dataType() is not DataType.Data:
235 raise KeyError ('unknown branch pattern for disable: ' + words[1])
236 elif words[0] == 'rename' :
237 if len (words) != 3 :
238 raise ValueError ('rename takes exactly two arguments: ' + command)
239 used = False
240 for name in outputConfigs :
241 if re.match (words[1], name) :
242 new_name = re.sub (words[1], words[2], name)
243 outputConfigsRename[new_name] = copy.deepcopy(outputConfigs[name])
244 outputConfigs[name].enabled = False
245 used = True
246 if not used and not optional and config.dataType() is not DataType.Data:
247 raise KeyError ('unknown branch pattern for rename: ' + words[1])
248 else :
249 raise KeyError ('unknown command for "commands" option: ' + words[0])
250
251 # update the outputConfigs with renamed branches
252 outputConfigs.update(outputConfigsRename)
253
254 autoVars = set()
255 autoMetVars = set()
256 autoTruthMetVars = set()
257 for outputName, outputConfig in outputConfigs.items():
258 if outputConfig.enabled :
259 if config.isMetContainer (outputConfig.origContainerName) and outputConfig.prefix not in self.containersFullMET:
260 if "Truth" in outputConfig.origContainerName:
261 myVars = autoTruthMetVars
262 else:
263 myVars = autoMetVars
264 else :
265 myVars = autoVars
266 if outputConfig.noSys :
267 outputConfig.outputContainerName = outputConfig.outputContainerName.replace ('%SYS%', 'NOSYS')
268 outputConfig.variableName = outputConfig.variableName.replace ('%SYS%', 'NOSYS')
270 outputName += "_NOSYS"
271 else :
272 outputName += '_%SYS%'
273 branchDecl = f"{outputConfig.outputContainerName}.{outputConfig.variableName} -> {outputName}"
274 if outputConfig.auxType is not None :
275 branchDecl += f" type={outputConfig.auxType}"
276 if config.isMetContainer (outputConfig.origContainerName) and outputConfig.prefix not in self.containersFullMET:
277 if "Truth" in outputConfig.origContainerName:
278 branchDecl += f" metTerm={self.truthMetTermName}"
279 else:
280 branchDecl += f" metTerm={self.metTermName}"
281 myVars.add(branchDecl)
282
283 # Add an ntuple dumper algorithm:
284 treeMaker = config.createAlgorithm( 'CP::TreeMakerAlg', 'TreeMaker' )
285 treeMaker.TreeName = self.treeName
286 treeMaker.RootStreamName = self.streamName
287 # the auto-flush setting still needs to be figured out
288 #treeMaker.TreeAutoFlush = 0
289
290 if self.vars or autoVars:
291 self.createOutputAlgs(config, 'NTupleMaker', self.vars | autoVars)
292
293 if self.metVars or autoMetVars:
294 userMetVars = set ()
295 if self.metVars :
296 for var in self.metVars:
297 userMetVars.add(var + " metTerm=" + self.metTermName)
298 self.createOutputAlgs(config, 'MetNTupleMaker', userMetVars | autoMetVars)
299
300 if config.dataType() is not DataType.Data and (self.truthMetVars or autoTruthMetVars):
301 userTruthMetVars = set ()
302 if self.truthMetVars :
303 for var in self.truthMetVars:
304 userTruthMetVars.add(var + " metTerm=" + self.truthMetTermName)
305 self.createOutputAlgs(config, 'TruthMetNTupleMaker', userTruthMetVars | autoTruthMetVars)
306
307 treeFiller = config.createAlgorithm( 'CP::TreeFillerAlg', 'TreeFiller' )
308 treeFiller.TreeName = self.treeName
309 treeFiller.RootStreamName = self.streamName
310
311
312
314 """
315 For each container and for each selection, create a single pass variable in output NTuple,
316 which aggregates all the selections flag of the given selection. For example, this can include
317 pT, eta selections, some object ID selection, overlap removal, etc.
318 The goal is to have only one flag per object and working point in the output NTuple.
319 """
320 originalContainersSeen = []
321 for prefix in self.containers.keys() :
322 outputContainerName = self.containers[prefix]
323 containerName = config.getOutputContainerOrigin(outputContainerName)
324 if containerName in originalContainersSeen:
325 continue
326 else:
327 originalContainersSeen.append(containerName)
328
329 # EventInfo is one obvious example of a container that has no object selections
330 if containerName == 'EventInfo':
331 continue
332
333 selectionNames = config.getSelectionNames(containerName)
334 for selectionName in selectionNames:
335 # skip default selection
336 if selectionName == '':
337 continue
338 # skip selection coming from the Thinning block
339 if self.skipRedundantSelectionFlags and "outputSelect" in selectionName:
340 continue
341 self.makeSelectionSummaryAlg(config, containerName, selectionName)
342
343 def makeSelectionSummaryAlg(self, config, containerName, selectionName):
344 """
345 Schedule an algorithm to pick up all cut flags for a given selectionName.
346 The summary selection flag is written to output as selectionFlagPrefix_selectionName.
347 """
348 alg = config.createAlgorithm( 'CP::AsgSelectionAlg',
349 f'ObjectSelectionSummary_{containerName}_{selectionName}')
350 selectionDecoration = f'baselineSelection_{selectionName}_%SYS%'
351 alg.selectionDecoration = f'{selectionDecoration},as_char'
352 alg.particles = config.readName (containerName)
353 alg.preselection = config.getFullSelection (containerName, selectionName)
354 config.addOutputVar (containerName, selectionDecoration, self.selectionFlagPrefix + '_' + selectionName)
makeSelectionSummaryAlg(self, config, containerName, selectionName)
STL class.
std::vector< std::string > intersection(std::vector< std::string > &v1, std::vector< std::string > &v2)
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition hcg.cxx:310