ATLAS Offline Software
Loading...
Searching...
No Matches
ConfigBlock.py
Go to the documentation of this file.
1# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
2
3import textwrap
4import inspect
5from functools import wraps
6import warnings
7
8from AnaAlgorithm.Logging import logging
9logCPAlgCfgBlock = logging.getLogger('CPAlgCfgBlock')
10
11from AnalysisAlgorithmsConfig.ConfigAccumulator import DataType, ExpertModeWarning
12import re
13
14def filter_dsids (filterList, config) :
15 """check whether the sample being run passes a"""
16 """possible DSID filter on the block"""
17 if len(filterList) == 0:
18 return True
19 for dsid_filter in filterList:
20 # Check if the pattern is enclosed in regex delimiters (e.g., starts with '^' or contains regex metacharacters)
21 if any(char in str(dsid_filter) for char in "^$*+?.()|[]{}\\"):
22 pattern = re.compile(dsid_filter)
23 if pattern.match(str(config.dsid())):
24 return True
25 else:
26 # Otherwise it's an exact DSID (but could be int or string)
27 if str(dsid_filter) == str(config.dsid()):
28 return True
29 return False
30
32 """this wrapper ensures that the 'instanceName' of the various """
33 """config blocks is cleaned up of any non-alphanumeric characters """
34 """that may arise from using 'selectionName' in the naming."""
35 @wraps(func)
36 def wrapper(*args, **kwargs):
37 # Get the string returned by the 'instanceName()' method of a config block
38 orig_name = func(*args, **kwargs)
39
40 if orig_name is None:
41 return ""
42
43 # Allowed replacements - anything else is likely a mistake on the user-side
44 result = orig_name.replace("||", "OR").replace("&&", "AND").replace("(","LB").replace(")","RB").replace(" ","")
45
46 return result
47 return wrapper
48
50 """this meta class enforces the application of 'alphanumeric_block_names()' """
51 """to 'instanceName()' and will be used in the main ConfigBlock class in order """
52 """to propagate this rule also to all derived classes (the individual config blocks."""
53 def __new__(cls, name, bases, dct):
54 # Automatically apply alphanumeric-only decorator to 'instanceName()' method
55 if 'instanceName' in dct and callable(dct['instanceName']):
56 dct['instanceName'] = alphanumeric_block_name(dct['instanceName'])
57 return super().__new__(cls, name, bases, dct)
58
60 """the information for a single option on a configuration block"""
61
62 def __init__ (self, type=None, info='', noneAction='ignore', required=False,
63 default=None) :
64 self.type = type
65 self.info = info
66 self.required = required
67 self.noneAction = noneAction
68 self.default = default
69
70
71
73 """Class encoding a blocks dependence on other blocks."""
74
75 def __init__(self, blockName, required=True):
76 self.blockName = blockName
77 self.required = required
78
79
80 def __eq__(self, name):
81 return self.blockName == name
82
83
84 def __str__(self):
85 return self.blockName
86
87
88 def __repr__(self):
89 return f'ConfigBlockDependency(blockName="{self.blockName}", required={self.required})'
90
91
92class ConfigBlock(metaclass=BlockNameProcessorMeta):
93 """the base class for classes implementing individual blocks of
94 configuration
95
96 A configuration block is a sequence of one or more algorithms that
97 should always be scheduled together, e.g. the muon four momentum
98 corrections could be a single block, muon selection could then be
99 another block. The blocks themselves generally have their own
100 configuration options/properties specific to the block, and will
101 perform a dynamic configuration based on those options as well as
102 the overall job.
103
104 The actual configuration of the algorithms in the block will
105 depend on what other blocks are scheduled before and afterwards,
106 most importantly some algorithms will introduce shallow copies
107 that subsequent algorithms will need to run on, and some
108 algorithms will add selection decorations that subquent algorithms
109 should use as preselections.
110
111 The algorithms get created in a multi-step process (that may be
112 extended in the future): As a first step each block retrieves
113 references to the containers it uses (essentially marking its spot
114 in the processing chain) and also registering any shallow copies
115 that will be made. In the second/last step each block then
116 creates the fully configured algorithms.
117
118 One goal is that when the algorithms get created they will have
119 their final configuration and there needs to be no
120 meta-configuration data attached to the algorithms, essentially an
121 inversion of the approach in AnaAlgSequence in which the
122 algorithms got created first with associated meta-configuration
123 and then get modified in susequent configuration steps.
124
125 For now this is mostly an empty base class, but another goal of
126 this approach is to make it easier to build another configuration
127 layer on top of this one, and this class will likely be extended
128 and get data members at that point.
129
130 The child class needs to implement the method `makeAlgs` which is
131 given a single `ConfigAccumulator` type argument. This is meant to
132 create the sequence of algorithms that this block configures. This
133 is currently (28 Jul 2025) called twice and should do the same thing
134 during both calls, but the plan is to change that to a single call.
135
136 The child class should also implement the method `getInstanceName`
137 which should return a string that is used to distinguish between
138 multiple instances of the same block. This is used to append the
139 instance name to the names of all algorithms created by this block,
140 and may in the future also be used to distinguish between multiple
141 instances of the block.
142 """
143
144 # Class-level dictionary to keep track of instance counts for each derived class
145 instance_counts = {}
146
147 def __init__ (self) :
148 self._blockName = ''
149 self._factoryName = None
151 self._options = {} # used with block configuration to set arbitrary option
152 self._expertModeSettings = {} # dictionary to track expert mode requirements for each option
153 self.addOption('groupName', '', type=str,
154 info=('Used to specify this block when setting an'
155 ' option at an arbitrary location.'))
156 self.addOption('skipOnData', False, type=bool,
157 info=('User option to prevent the block from running'
158 ' on data. This only affects blocks that are'
159 ' intended to run on data.'))
160 self.addOption('skipOnMC', False, type=bool,
161 info=('User option to prevent the block from running'
162 ' on MC. This only affects blocks that are'
163 ' intended to run on MC.'))
164 self.addOption('onlyForDSIDs', [], type=list,
165 info=('Used to specify which MC DSIDs to allow this'
166 ' block to run on. Each element of the list'
167 ' can be a full DSID (e.g. 410470), or a regex'
168 ' (e.g. 410.* to select all 410xxx DSIDs, or'
169 ' ^(?!410) to veto them). An empty list means no'
170 ' DSID restriction.'))
171 self.addOption('propertyOverrides', {}, type=None,
172 info=('EXPERT USE ONLY: A dictionary of properties to'
173 ' override at the end of configuration. This should'
174 ' take the form'
175 ' {"algName.toolName.propertyName": value, ...},'
176 ' without any automatically applied postfixes for'
177 ' the algorithm name. THIS IS MEANT TO BE EXPERT'
178 ' USAGE ONLY. Properties that need to be set by'
179 ' the user should be declared as options on the'
180 ' block itself. EXPERT USE ONLY!'),
181 expertMode=True)
182 # Increment the instance count for the current class
183 cls = type(self) # Get the actual class of the instance (also derived!)
184 if cls not in ConfigBlock.instance_counts:
185 ConfigBlock.instance_counts[cls] = 0
186 # Note: we do need to check in the call stack that we are
187 # in a real makeConfig situation, and not e.g. printAlgs
188 stack = inspect.stack()
189 for frame_info in stack:
190 # Get the class name (if any) from the frame
191 parent_cls = frame_info.frame.f_locals.get('self', None)
192 if parent_cls is None or not isinstance(parent_cls, ConfigBlock):
193 # If the frame does not belong to an instance of ConfigBlock, it's an external caller
194 if frame_info.function == "makeConfig":
195 ConfigBlock.instance_counts[cls] += 1
196 break
197
198
199 def setBlockName(self, name):
200 """Set blockName"""
201 self._blockName = name
202
203 def getBlockName(self):
204 """Get blockName"""
205 return self._blockName
206
207 def factoryName(self):
208 """get the factory name for this block
209
210 This is mostly to give a reliable means of identifying the type
211 of block we have in error messages. This is meant to be
212 automatically set by the factory based on the requested block
213 name, but there are a number of fallbacks. It is best not to
214 assume a specific format, this is mostly meant to be used as an
215 identifier in output messages.
216 """
217 if self._factoryName is not None and self._factoryName != '':
218 return self._factoryName
219 # If no factory name is set and the block has a name, use that
220 if self._blockName is not None and self._blockName != '':
221 return self._blockName
222 # Use the class name as a fallback
223 return self.__class__.__name__
224
225 def setFactoryName(self, name):
226 """set the factory name for this block
227
228 This is meant to be called automatically by the factory based on
229 the requested block name. If you are creating a block without a factory,
230 you can call this method to set the factory name manually.
231 """
232 self._factoryName = name
233
234 def instanceName(self):
235 """Get the name of the instance
236
237 The name of the instance is used to distinguish between multiple
238 instances of the same block. Most importantly, this will be
239 appended to the names of all algorithms created by this block.
240 This defaults to an empty string, but block implementations
241 should override it with an appropriate name based on identifying
242 options set on this instance. A typical example would be the
243 name of the (main) container, plus potentially the selection or
244 working point.
245
246 Ideally all blocks should override this method, but for backward
247 compatibility (28 Jul 25) it defaults to an empty string.
248 """
249 return ''
250
251 def isUsedForConfig(self, config):
252 """
253 whether this block should be used for the given configuration
254
255 This is used by `ConfigSequence` to determine whether this block
256 should be included in the configuration.
257 """
258 if self.skipOnData and config.dataType() is DataType.Data:
259 return False
260 if self.skipOnMC and config.dataType() is not DataType.Data:
261 return False
262 if not filter_dsids(self.onlyForDSIDs, config):
263 return False
264 return True
265
266 def applyConfigOverrides(self, config):
267 """
268 Apply any configuration overrides specified in the block's
269 `propertyOverrides` option. This is meant to be called at the
270 end of the configuration process, after all algorithms have been
271 created and configured.
272 """
273 for key, value in self.propertyOverrides.items():
274 # Split the key into algorithm name, tool name, and property name
275 parts = key.split('.')
276 if len(parts) < 2:
277 raise Exception(f"Invalid override key format: {key}")
278 alg = config.getAlgorithm(parts[0])
279 if alg is None:
280 raise Exception(f"Algorithm {parts[0]} not found in config for override: {key}")
281 for name in parts[1:-1]:
282 # Navigate through tools if necessary
283 if hasattr(alg, name):
284 alg = getattr(alg, name)
285 else:
286 raise Exception(f"Tool {name} not found for override: {key}")
287 # Set the property on the algorithm/tool. This is probably a
288 # horrible hack, but `setattr` didn't work for me.
289 alg.__setattr__(parts[-1], value)
290
291 def addDependency(self, dependencyName, required=True):
292 """
293 Add a dependency for the block. Dependency is corresponds to the
294 blockName of another block. If required is True, will throw an
295 error if dependency is not present; otherwise will move this
296 block after the required block. If required is False, will do
297 nothing if required block is not present; otherwise, it will
298 move block after required block.
299 """
300 if not self.hasDependencies():
301 # add option to block ignore dependencies
302 self.addOption('ignoreDependencies', [], type=list,
303 info='List of dependencies defined in the ConfigBlock to ignore.')
304 self._dependencies.append(ConfigBlockDependency(dependencyName, required))
305
307 """Return True if there is a dependency."""
308 return bool(self._dependencies)
309
311 """Return the list of dependencies. """
312 return self._dependencies
313
314 def addOption (self, name, defaultValue, *,
315 type, info='', noneAction='ignore', required=False, expertMode=None) :
316 """declare the given option on the configuration block
317
318 This should only be called in the constructor of the
319 configuration block.
320
321 NOTE: The backend to option handling is slated to be replaced
322 at some point. This particular function should essentially
323 stay the same, but some behavior may change.
324 """
325 if name in self._options :
326 raise KeyError (f'duplicate option: {name}')
327 if type not in [str, bool, int, float, list, None] :
328 raise TypeError (f'unknown option type: {type}')
329 noneActions = ['error', 'set', 'ignore']
330 if noneAction not in noneActions :
331 raise ValueError (f'invalid noneAction: {noneAction} [allowed values: {noneActions}]')
332
333 # Store expert mode settings if provided
334 if expertMode is not None:
335 if expertMode is True:
336 # in this case we will just check against the default value
337 self._expertModeSettings[name] = True
338 elif not isinstance(expertMode, list):
339 raise TypeError (f'expertMode must be a list, got {type(expertMode)}')
340 else:
341 # here we will check against a list of custom values
342 self._expertModeSettings[name] = expertMode
343
344 setattr (self, name, defaultValue)
345 self._options[name] = ConfigBlockOption(type=type, info=info,
346 noneAction=noneAction, required=required, default=defaultValue)
347
348
349 def setOptionValue (self, name, value) :
350 """set the given option on the configuration block
351
352 NOTE: The backend to option handling is slated to be replaced
353 at some point. This particular function should essentially
354 stay the same, but some behavior may change.
355 """
356
357 if name not in self._options :
358 raise KeyError (f'unknown option "{name}" in block "{self.__class__.__name__}"')
359 noneAction = self._options[name].noneAction
360 if value is not None or noneAction == 'set' :
361 # check type if specified
362 optType = self._options[name].type
363 # convert int to float to prevent crash
364 if optType is float and type(value) is int:
365 value = float(value)
366 if optType is not None and optType != type(value):
367 raise ValueError(f'{name} for block {self.__class__.__name__} should '
368 f'be of type {optType} not {type(value)}')
369 setattr (self, name, value)
370 elif noneAction == 'ignore' :
371 pass
372 elif noneAction == 'error' :
373 raise ValueError (f'passed None for setting option {name} with noneAction=error')
374
375
376 def getOptionValue(self, name):
377 """Returns config option value, if present; otherwise return None"""
378 if name in self._options:
379 return getattr(self, name)
380
381
382 def getOptions(self):
383 """Return a copy of the options associated with the block"""
384 return self._options.copy()
385
386
387 def printOptions(self, verbose=False, width=60, indent=" "):
388 """
389 Prints options and their values
390 """
391 def printWrap(text, width=60, indent=" "):
392 wrapper = textwrap.TextWrapper(width=width, initial_indent=indent,
393 subsequent_indent=indent)
394 for line in wrapper.wrap(text=text):
395 logCPAlgCfgBlock.info(line)
396
397 for opt, vals in self.getOptions().items():
398 if verbose:
399 logCPAlgCfgBlock.info(indent + f"\033[4m{opt}\033[0m: {self.getOptionValue(opt)}")
400 logCPAlgCfgBlock.info(indent*2 + f"\033[4mtype\033[0m: {vals.type}")
401 logCPAlgCfgBlock.info(indent*2 + f"\033[4mdefault\033[0m: {vals.default}")
402 logCPAlgCfgBlock.info(indent*2 + f"\033[4mrequired\033[0m: {vals.required}")
403 logCPAlgCfgBlock.info(indent*2 + f"\033[4mnoneAction\033[0m: {vals.noneAction}")
404 printWrap(f"\033[4minfo\033[0m: {vals.info}", indent=indent*2)
405 else:
406 logCPAlgCfgBlock.info(indent + f"{ opt}: {self.getOptionValue(opt)}")
407
408
409 def hasOption (self, name) :
410 """whether the configuration block has the given option
411
412 WARNING: The backend to option handling is slated to be
413 replaced at some point. This particular function may change
414 behavior, interface or be removed/replaced entirely.
415 """
416 return name in self._options
417
418
419 def __eq__(self, blockName):
420 """
421 Implementation of == operator. Used for seaching configSeque.
422 E.g. if blockName in configSeq:
423 """
424 return self._blockName == blockName
425
426
427 def __str__(self):
428 return self._blockName
429
430
431 @classmethod
433 # Access the current count for this class
434 return ConfigBlock.instance_counts.get(cls, 0)
435
436 def _is_expert_value(self, rule, value):
437 """
438 Check whether value matches an expert mode rule.
439 Rule can be:
440 - A literal (compared with ==)
441 - A callable predicate (called with value)
442 - A special marker string (common callable)
443 """
444 if callable(rule):
445 return rule(value)
446
447 if isinstance(rule, str):
448 if rule == "nonemptystring":
449 return isinstance(value, str) and value != ""
450 if rule == "nonemptylist":
451 return isinstance(value, list) and value != []
452 if rule == "positiveint":
453 return isinstance(value, int) and value > 0
454
455 # Fallback: direct value comparison
456 return value == rule
457
458 def checkExpertSettings(self, config):
459 """
460 Check if any settings require expert mode and validate accordingly.
461 If any setting is set to a value that requires expert mode but we're
462 not in expert mode, raise an error.
463 """
464 for option_name, expert_rule in self._expertModeSettings.items():
465 current_value = self.getOptionValue(option_name)
466 default_value = self._options[option_name].default
467
468 if expert_rule is True:
469 # Any deviation from the default requires expert mode
470 if current_value != default_value:
471 warnings.warn(
472 f"Block '{self.factoryName()}' option '{option_name}' "
473 f"set to '{current_value}' (default '{default_value}'), "
474 f"requires expert mode.",
475 ExpertModeWarning, stacklevel=2
476 )
477
478 else: # it's a list of expert values/markers/predicates
479 for ev in expert_rule:
480 if self._is_expert_value(ev, current_value):
481 warnings.warn(
482 f"Block '{self.factoryName()}' option '{option_name}' "
483 f"set to expert-only value '{current_value}'. "
484 f"Requires expert mode.",
485 ExpertModeWarning, stacklevel=2
486 )
487 # All checks passed
488 return
__init__(self, blockName, required=True)
__init__(self, type=None, info='', noneAction='ignore', required=False, default=None)
_is_expert_value(self, rule, value)
printOptions(self, verbose=False, width=60, indent=" ")
setOptionValue(self, name, value)
addDependency(self, dependencyName, required=True)
addOption(self, name, defaultValue, *, type, info='', noneAction='ignore', required=False, expertMode=None)
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition hcg.cxx:310
alphanumeric_block_name(func)
filter_dsids(filterList, config)