ATLAS Offline Software
Loading...
Searching...
No Matches
OutputAnalysisConfig.py
Go to the documentation of this file.
1# Copyright (C) 2002-2022 CERN for the benefit of the ATLAS collaboration
2
3# AnaAlgorithm import(s):
4from AnalysisAlgorithmsConfig.ConfigBlock import ConfigBlock
5from AnalysisAlgorithmsConfig.ConfigAccumulator import DataType
6from AnalysisAlgorithmsConfig.ConfigBlock import filter_dsids
7from AthenaCommon.Logging import logging
8import copy, re
9
10class OutputAnalysisConfig (ConfigBlock):
11 """the ConfigBlock for the MET configuration"""
12
13 def __init__ (self) :
14 super (OutputAnalysisConfig, self).__init__ ()
15 self.addOption ('postfix', '', type=str,
16 info="a postfix to apply to decorations and algorithm names. "
17 "Typically not needed here.")
18 self.addOption ('vars', [], type=list,
19 info="a list of mappings (list of strings) between containers and "
20 "decorations to output branches.")
21 self.addOption ('varsOnlyForMC', [], type=list,
22 info="same as `vars`, but for MC-only variables so as to avoid a "
23 "crash when running on data.")
24 self.addOption ('metVars', [], type=list,
25 info="a list of mappings (list of strings) between containers "
26 "and decorations to output branches. Specficially for MET "
27 "variables, where only the final MET term is retained.")
28 self.addOption ('truthMetVars', [], type=list,
29 info="a list of mappings (list of strings) between containers "
30 "and decorations to output branches for truth MET.")
31 self.addOption ('containers', {}, type=dict,
32 info="a dictionary mapping prefixes (key) to container names "
33 "(values) to be used when saving to the output tree. Branches "
34 "are then of the form `prefix_decoration`.")
35 self.addOption ('containersFullMET', {}, type=dict,
36 info="same as `containers`, but for MET containers that should be "
37 "saved with all terms (as opposed to just the final term). This "
38 "is useful for special studies. A container can appear both here and "
39 "in containers (with different prefixes).")
40 self.addOption ('containersOnlyForMC', {}, type=dict,
41 info="same as `containers`, but for MC-only containers so as to avoid "
42 "a crash when running on data.")
43 self.addOption ('containersOnlyForDSIDs', {}, type=dict,
44 info="specify which DSIDs are allowed to produce a given container. "
45 "This works like `onlyForDSIDs`: pass a list of DSIDs or regexps.")
46 self.addOption ('nonContainers', [], type=list,
47 info="a list of container names that are not actual containers but should be treated as non-containers.")
48 self.addOption ('treeName', 'analysis', type=str,
49 info="name of the output TTree to save.")
50 self.addOption ('streamName', 'ANALYSIS', type=str,
51 info="name of the output stream to save the tree in.")
52 self.addOption ('metTermName', 'Final', type=str,
53 info="the name of the MET term to save, turning the MET "
54 "container into a single object.")
55 self.addOption ('truthMetTermName', 'NonInt', type=str,
56 info="the name of the truth MET term to save, turning the MET "
57 "container into a single object.")
58 self.addOption ('storeSelectionFlags', True, type=bool,
59 info="whether to store one branch for each object selection.")
60 self.addOption ('selectionFlagPrefix', 'select', type=str,
61 info="the prefix used when naming selection branches.")
62 self.addOption ('commands', [], type=list,
63 info="a list of strings containing commands (regexp strings "
64 "prefaced by the keywords `enable` or `disable`) to turn on/off the "
65 "writing of branches to the output ntuple. If left empty, do not modify "
66 "the scheduled output branches.")
67 self.addOption ('commandsOnlyForDSIDs', {}, type=dict,
68 info="a dictionary with individual DSIDs as keys, and a list of strings "
69 "like for the `commands` option as items. These `commands` will only be run "
70 "for the corresponding DSID.")
71 self.addOption ('alwaysAddNosys', False, type=bool,
72 info="If set to `True`, all branches will be given a systematics suffix, "
73 "even if they have no systematics (beyond the nominal).")
74 self.addOption ('skipRedundantSelectionFlags', True, type=bool,
75 info="remove the redundant 'outputSelect' branches created by the Thinning step. "
76 "These could however be used to simplify downstream workflows, as in Easyjet. "
77 "The default is True.")
78 self.addOption ('outputFormat', 'TTree', type=str,
79 info="The output format. The default is 'TTree'.")
80 self.addOption ('defaultBasketSize', None, type=int,
81 info="default basket size for all branches in the output tree. "
82 "If not set (the default), no basket size is configured and ROOT's "
83 "default will be used.")
84 # helper to protect for second pass
85 self.validated = False
86
87 def instanceName (self) :
88 """Return the instance name for this block"""
89 if self.postfix is not None and self.postfix != '':
90 return self.postfix
91 return self.treeName
92
93 @staticmethod
94 def branchSortOrder (rule):
95 return rule.split('->')[1].strip()
96
97 def createOutputAlgs (self, config, name, vars):
98 """A helper function to create output algorithm"""
99 alg = config.createAlgorithm('CP::AsgxAODNTupleMakerAlg', name)
100 alg.TreeName = self.treeName
101 alg.RootStreamName = self.streamName
102 alg.NonContainers = list(self.nonContainers)
103 branchList = list(vars)
104 branchList.sort(key=self.branchSortOrder)
105 branchList_nosys = [branch for branch in branchList if "%SYS%" not in branch]
106 branchList_sys = [branch for branch in branchList if "%SYS%" in branch]
107 alg.Branches = branchList_nosys + branchList_sys
108 if self.defaultBasketSize is not None:
109 alg.DefaultBasketSize = self.defaultBasketSize
110 return alg
111
112 def makeAlgs (self, config) :
113
114 log = logging.getLogger('OutputAnalysisConfig')
115
116 # do some transformations of the options we should only do once
117 if not self.validated:
118
119 self.containers = dict(self.containers)
120 self.vars = set(self.vars)
122 self.metVars = set(self.metVars)
124
125 # check for overlaps between containers and containersFullMET
126 overlapping_keys = set(self.containers.keys()).intersection(self.containersFullMET.keys())
127 if overlapping_keys:
128 # convert the set of overlapping keys to a list of strings for the message (represents the empty string too!)
129 keys_message = [repr(key) for key in overlapping_keys]
130 raise KeyError(f"containersFullMET would overwrite the following container keys: {', '.join(keys_message)}")
131 # move items in self.containersFullMET to containers
133
134 # merge the MC-specific branches and containers into the main list/dictionary only if we are not running on data
135 if config.dataType() is not DataType.Data:
136 self.vars |= self.varsOnlyForMC
137
138 # protect 'containers' against being overwritten
139 # find overlapping keys
140 overlapping_keys = set(self.containers.keys()).intersection(self.containersOnlyForMC.keys())
141 if overlapping_keys:
142 # convert the set of overlapping keys to a list of strings for the message (represents the empty string too!)
143 keys_message = [repr(key) for key in overlapping_keys]
144 raise KeyError(f"containersOnlyForMC would overwrite the following container keys: {', '.join(keys_message)}")
145
146 # move items in self.containersOnlyForMC to self.containers
148 # clear the dictionary to avoid overlapping key error during the second pass
150
151 # now filter the containers depending on DSIDs
153 for container, dsid_filters in self.containersOnlyForDSIDs.items():
154 if container not in self.containers:
155 log.warning("Skipping unrecognised container prefix '%s' for DSID-filtering in OutputAnalysisConfig...", container)
156 continue
157 if not filter_dsids (dsid_filters, config):
158 # if current DSID is not allowed for this container, remove it
159 log.info("Skipping container prefix '%s' due to DSID filtering...", container)
160 # filter branches for validated containers
161 for var in set(self.vars): # make a copy of the list to avoid modifying it while iterating
162 var_container = var.split('.')[0].replace('_NOSYS', '').replace('_%SYS%', '')
163 if var_container == self.containers[container]:
164 self.vars.remove(var)
165 log.info("Skipping branch definition '%s' for excluded container %s...", var, var_container)
166 # filter branches for MET variables
167 for var in set(self.metVars): # make a copy of the list to avoid modifying it while iterating
168 var_container = var.split('.')[0].replace('_NOSYS', '').replace('_%SYS%', '')
169 if var_container == self.containers[container]:
170 self.metVars.remove(var)
171 log.info("Skipping MET branch definition '%s' for excluded container %s...", var, var_container)
172 # filter branches for truth MET variables
173 for var in set(self.truthMetVars): # make a copy of the list to avoid modifying it while iterating
174 var_container = var.split('.')[0].replace('_NOSYS', '').replace('_%SYS%', '')
175 if var_container == self.containers[container]:
176 self.truthMetVars.remove(var)
177 log.info("Skipping truth MET branch definition '%s' for excluded container %s...", var, var_container)
178 # remove the container from the list at the end
179 self.containers.pop (container)
180 # clear the dictionary to avoid warnings during the second pass
182
183 for prefix, container in self.containers.items():
184 origName = config.getOutputContainerOrigin(container)
185 if config.getContainerMeta(origName, "nonContainer", False):
186 self.nonContainers.append(origName)
187
188 # at this point we are OK
189 self.validated = True
190
192 self.createSelectionFlagBranches(config)
193
194 outputConfigs = {}
195 for prefix in self.containers.keys() :
196 containerName = self.containers[prefix]
197 outputDict = config.getOutputVars (containerName)
198 for outputName in outputDict :
199 outputConfig = copy.deepcopy (outputDict[outputName])
200 if containerName != outputConfig.origContainerName or config.checkOutputContainer(containerName):
201 outputConfig.outputContainerName = containerName + '_%SYS%'
202 else:
203 outputConfig.outputContainerName = config.readName(containerName)
204 outputConfig.prefix = prefix
205 # if the container is a MET container with all terms, we
206 # also need to write out the name of each MET term
207 if prefix in self.containersFullMET and outputConfig.variableName == 'name':
208 outputConfig.enabled = True
209 outputConfigs[prefix + outputName] = outputConfig
210
211 # check for DSID-specific commands
212 for dsid, dsid_commands in self.commandsOnlyForDSIDs.items():
213 if filter_dsids([dsid], config):
214 self.commands += dsid_commands
215
216 outputConfigsRename = {}
217 for command in self.commands :
218 words = command.split (' ')
219 if len (words) == 0 :
220 raise ValueError ('received empty command for "commands" option')
221 optional = words[0] == 'optional'
222 if optional :
223 words = words[1:] # remove the 'optional' keyword
224 if words[0] == 'enable' :
225 if len (words) != 2 :
226 raise ValueError ('enable takes exactly one argument: ' + command)
227 used = False
228 for name in outputConfigs :
229 if re.match (words[1], name) :
230 outputConfigs[name].enabled = True
231 used = True
232 if not used and not optional and config.dataType() is not DataType.Data:
233 raise KeyError ('unknown branch pattern for enable: ' + words[1])
234 elif words[0] == 'disable' :
235 if len (words) != 2 :
236 raise ValueError ('disable takes exactly one argument: ' + command)
237 used = False
238 for name in outputConfigs :
239 if re.match (words[1], name) :
240 outputConfigs[name].enabled = False
241 used = True
242 if not used and not optional and config.dataType() is not DataType.Data:
243 raise KeyError ('unknown branch pattern for disable: ' + words[1])
244 elif words[0] == 'rename' :
245 if len (words) != 3 :
246 raise ValueError ('rename takes exactly two arguments: ' + command)
247 used = False
248 for name in outputConfigs :
249 if re.match (words[1], name) :
250 new_name = re.sub (words[1], words[2], name)
251 outputConfigsRename[new_name] = copy.deepcopy(outputConfigs[name])
252 outputConfigs[name].enabled = False
253 used = True
254 if not used and not optional and config.dataType() is not DataType.Data:
255 raise KeyError ('unknown branch pattern for rename: ' + words[1])
256 else :
257 raise KeyError ('unknown command for "commands" option: ' + words[0])
258
259 # update the outputConfigs with renamed branches
260 outputConfigs.update(outputConfigsRename)
261
262 autoVars = set()
263 autoMetVars = set()
264 autoTruthMetVars = set()
265 for outputName, outputConfig in outputConfigs.items():
266 if outputConfig.enabled :
267 if config.isMetContainer (outputConfig.origContainerName) and outputConfig.prefix not in self.containersFullMET:
268 if "Truth" in outputConfig.origContainerName:
269 myVars = autoTruthMetVars
270 else:
271 myVars = autoMetVars
272 else :
273 myVars = autoVars
274 if outputConfig.noSys :
275 outputConfig.outputContainerName = outputConfig.outputContainerName.replace ('%SYS%', 'NOSYS')
276 outputConfig.variableName = outputConfig.variableName.replace ('%SYS%', 'NOSYS')
278 outputName += "_NOSYS"
279 else :
280 outputName += '_%SYS%'
281 branchDecl = f"{outputConfig.outputContainerName}.{outputConfig.variableName} -> {outputName}"
282 if outputConfig.auxType is not None :
283 branchDecl += f" type={outputConfig.auxType}"
284 if config.isMetContainer (outputConfig.origContainerName) and outputConfig.prefix not in self.containersFullMET:
285 if "Truth" in outputConfig.origContainerName:
286 branchDecl += f" metTerm={self.truthMetTermName}"
287 else:
288 branchDecl += f" metTerm={self.metTermName}"
289 myVars.add(branchDecl)
290
291 # Unified branch collection for all output formats
292 allBranches = set()
293 allBranches |= self.vars
294 allBranches |= autoVars
295 # Add MET branches
296 userMetVars = set()
297 if self.metVars:
298 for var in self.metVars:
299 userMetVars.add(var + " metTerm=" + self.metTermName)
300 allBranches |= userMetVars
301 allBranches |= autoMetVars
302 # Add truth MET branches (for MC)
303 userTruthMetVars = set()
304 if config.dataType() is not DataType.Data:
305 if self.truthMetVars:
306 for var in self.truthMetVars:
307 userTruthMetVars.add(var + " metTerm=" + self.truthMetTermName)
308 allBranches |= userTruthMetVars
309 allBranches |= autoTruthMetVars
310
311 # Create the output algorithm based on outputFormat
312 if self.outputFormat == 'RNTuple':
313 alg = config.createAlgorithm('CP::RNtupleTreeMakerAlg', 'RNtupleMaker')
314 alg.TreeName = self.treeName
315 alg.RootStreamName = self.streamName
316 alg.OutputStreamName = self.streamName
317 alg.NonContainers = list(self.nonContainers)
318
319 branchList = list(allBranches)
320 branchList.sort(key=self.branchSortOrder)
321 alg.Branches = branchList
322
323 return
324
325 # Add an ntuple dumper algorithm:
326 treeMaker = config.createAlgorithm( 'CP::TreeMakerAlg', 'TreeMaker' )
327 treeMaker.TreeName = self.treeName
328 treeMaker.RootStreamName = self.streamName
329 # the auto-flush setting still needs to be figured out
330 #treeMaker.TreeAutoFlush = 0
331
332 if self.vars or autoVars:
333 self.createOutputAlgs(config, 'NTupleMaker', self.vars | autoVars)
334
335 if self.metVars or autoMetVars:
336 self.createOutputAlgs(config, 'MetNTupleMaker', userMetVars | autoMetVars)
337
338 if config.dataType() is not DataType.Data and (self.truthMetVars or autoTruthMetVars):
339 self.createOutputAlgs(config, 'TruthMetNTupleMaker', userTruthMetVars | autoTruthMetVars)
340
341 treeFiller = config.createAlgorithm( 'CP::TreeFillerAlg', 'TreeFiller' )
342 treeFiller.TreeName = self.treeName
343 treeFiller.RootStreamName = self.streamName
344
345
346
348 """
349 For each container and for each selection, create a single pass variable in output NTuple,
350 which aggregates all the selections flag of the given selection. For example, this can include
351 pT, eta selections, some object ID selection, overlap removal, etc.
352 The goal is to have only one flag per object and working point in the output NTuple.
353 """
354 originalContainersSeen = []
355 for prefix in self.containers.keys() :
356 outputContainerName = self.containers[prefix]
357 containerName = config.getOutputContainerOrigin(outputContainerName)
358 if containerName in originalContainersSeen:
359 continue
360 else:
361 originalContainersSeen.append(containerName)
362
363 # EventInfo is one obvious example of a container that has no object selections
364 if containerName == 'EventInfo':
365 continue
366
367 # Get the selection names, except the systematic-dependent version of the FTAG
368 # selection flag, as it's already saved as a systematic-independent output branch
369 selectionNames = config.getSelectionNames(containerName, excludeFrom={'ftag'})
370 for selectionName in selectionNames:
371 # skip default selection
372 if selectionName == '':
373 continue
374 # skip selection coming from the Thinning block
375 if self.skipRedundantSelectionFlags and "outputSelect" in selectionName:
376 continue
377 self.makeSelectionSummaryAlg(config, containerName, selectionName)
378
379 def makeSelectionSummaryAlg(self, config, containerName, selectionName):
380 """
381 Schedule an algorithm to pick up all cut flags for a given selectionName.
382 The summary selection flag is written to output as selectionFlagPrefix_selectionName.
383 """
384 alg = config.createAlgorithm( 'CP::AsgSelectionAlg',
385 f'ObjectSelectionSummary_{containerName}_{selectionName}')
386 selectionDecoration = f'baselineSelection_{selectionName}_%SYS%'
387 alg.selectionDecoration = f'{selectionDecoration},as_char'
388 alg.particles = config.readName (containerName)
389 alg.preselection = config.getFullSelection (containerName, selectionName)
390 config.addOutputVar (containerName, selectionDecoration, self.selectionFlagPrefix + '_' + selectionName)
void clear()
Empty the pool.
makeSelectionSummaryAlg(self, config, containerName, selectionName)
STL class.
std::vector< std::string > intersection(std::vector< std::string > &v1, std::vector< std::string > &v2)
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition hcg.cxx:312