ATLAS Offline Software
Loading...
Searching...
No Matches
ConfigText.py
Go to the documentation of this file.
1# Copyright (C) 2002-2026 CERN for the benefit of the ATLAS collaboration
2#
3# @author Joseph Lambert
4
5import yaml
6import json
7import os
8import sys
9import importlib
10import pathlib
11import warnings
12
13from AnalysisAlgorithmsConfig.ConfigSequence import ConfigSequence
14from AnalysisAlgorithmsConfig.ConfigFactory import ConfigFactory
15from AnalysisAlgorithmsConfig.ConfigAccumulator import deprecationWarningCategory
16
17from AnaAlgorithm.DualUseConfig import isAthena
18from AnaAlgorithm.Logging import logging
19logCPAlgTextCfg = logging.getLogger('CPAlgTextCfg')
20
21
22def readYaml(yamlPath):
23 """Loads YAML file into a dictionary"""
24 if not os.path.isfile(yamlPath):
25 raise ValueError(f"{yamlPath} is not a file.")
26 with open(yamlPath, 'r') as f:
27 textConfig = yaml.safe_load(f)
28 return textConfig
29
30
31def printYaml(d, sort=False, jsonFormat=False):
32 """Prints a dictionary as YAML"""
33 print(yaml.dump(d, default_flow_style=jsonFormat, sort_keys=sort))
34
35class TextConfigWarning(FutureWarning):
36 pass
37
38
39class TextConfig(ConfigFactory):
40 def __init__(self, yamlPath=None, *, config=None, addDefaultBlocks=True):
41 super().__init__(addDefaultBlocks=False)
42
43 if yamlPath and config:
44 raise ValueError("Cannot specify both yamlPath and config. Use one or the other.")
45
46 # Block to add new blocks to this object
47 self.addAlgConfigBlock(algName="AddConfigBlocks", alg=self._addNewConfigBlocks,
48 defaults={'self': self})
49 # add default blocks
50 if addDefaultBlocks:
51 self.addDefaultAlgs()
52 # load yaml
53 self._config = {}
54 # do not allow for loading multiple yaml files
55 self.__loadedYaml = False
56 if yamlPath is not None or config is not None:
57 self.loadConfig(yamlPath, configDict=config)
58 # last is used for setOptionValue when using addBlock
59 self._last = None
60
61
62 def setConfig(self, config):
63 """Print YAML configuration file."""
64 if self._config:
65 raise ValueError("Configuration has already been loaded.")
66 self._config = config
67 return
68
69 # Less-than-ideal fix introduced in !76767
70 def preprocessConfig(self, config, algs):
71 """
72 Preprocess the configuration dictionary.
73 Ensure blocks with only sub-blocks are initialized with an empty dictionary.
74 """
75 def processNode(node, algs):
76 if not isinstance(node, dict):
77 return # Base case: not a dictionary
78 for blockName, blockContent in list(node.items()):
79 # If the block name is recognized in algs
80 if blockName in algs:
81 # If the block only defines sub-blocks, initialize it
82 if isinstance(blockContent, dict) and not any(
83 key in algs[blockName].options for key in blockContent
84 ):
85 # Ensure parent block is initialized as an empty dictionary
86 node[blockName] = {'__placeholder__': True, **blockContent}
87 # Recurse into sub-blocks
88 processNode(node[blockName], algs[blockName].subAlgs)
89
90 # Start processing from the root of the configuration
91 processNode(config, algs)
92
93 # Less-than-ideal fix introduced in !76767
94 def cleanupPlaceholders(self, config):
95 """
96 Remove placeholder markers after initialization.
97 """
98 if not isinstance(config, dict):
99 return
100 if "__placeholder__" in config:
101 del config["__placeholder__"]
102 for key, value in config.items():
103 self.cleanupPlaceholders(value)
104
105 def loadConfig(self, yamlPath=None, *, configDict=None):
106 """
107 read a YAML file. Will combine with any config blocks added using python
108 """
109 if self.__loadedYaml or isinstance(yamlPath, list):
110 raise NotImplementedError("Mering multiple yaml files is not implemented.")
111 self.__loadedYaml = True
112
113 def merge(config, algs, path=''):
114 """Add to config block-by-block"""
115 if not isinstance(config, list):
116 config = [config]
117 # loop over list of blocks with same block name
118 for blocks in config:
119 # deal with case where empty dict is config
120 if blocks == {} and path:
121 self.addBlock(path)
122 return
123 # remove any subBlocks from block config
124 subBlocks = {}
125 for blockName in algs:
126 if blockName in blocks:
127 subBlocks[blockName] = blocks.pop(blockName)
128 # anything left should be a block and it's configuration
129 if blocks:
130 self.addBlock(path, **blocks)
131 # add in any subBlocks
132 for subName, subBlock in subBlocks.items():
133 newPath = f'{path}.{subName}' if path else subName
134 merge(subBlock, algs[subName].subAlgs, newPath)
135 return
136
137 logCPAlgTextCfg.debug(f'loading {yamlPath}')
138 if configDict is not None:
139 # if configDict is provided, use it directly
140 config = configDict
141 else:
142 config = readYaml(yamlPath)
143 # check if blocks are defined in yaml file
144 if "AddConfigBlocks" in config:
145 self._configureAlg(self._algs["AddConfigBlocks"], config["AddConfigBlocks"])
146
147 # Preprocess the configuration dictionary (see !76767)
148 self.preprocessConfig(config, self._algs)
149
150 merge(config, self._algs)
151
152 # Cleanup placeholders (see !76767)
153 self.cleanupPlaceholders(config)
154
155 return
156
157
158 def printConfig(self, sort=False, jsonFormat=False):
159 """Print YAML configuration file."""
160 if self._config is None:
161 raise ValueError("No configuration has been loaded.")
162 printYaml(self._config, sort, jsonFormat)
163 return
164
165
166 def saveYaml(self, filePath='config.yaml', default_flow_style=False,
167 **kwargs):
168 """
169 Convert dictionary representation to yaml and save
170 """
171 logCPAlgTextCfg.info(f"Saving configuration to {filePath}")
172 config = self._config
173 with open(filePath, 'w') as outfile:
174 yaml.dump(config, outfile, default_flow_style=False, **kwargs)
175 return
176
177
178 def addBlock(self, name, **kwargs):
179 """
180 Create entry into dictionary representing the text configuration
181 """
182 def setEntry(name, config, opts):
183 if '.' not in name:
184 if name not in config:
185 config[name] = opts
186 elif isinstance(config[name], list):
187 config[name].append(opts)
188 else:
189 config[name] = [config[name], opts]
190 # set last added block for setOptionValue
191 self._last = opts
192 else:
193 name, rest = name[:name.index('.')], name[name.index('.') + 1:]
194 config = config[name]
195 if isinstance(config, list):
196 config = config[-1]
197 setEntry(rest, config, opts)
198 return
199 setEntry(name, self._config, dict(kwargs))
200 return
201
202
203 def setOptions(self, **kwargs):
204 """
205 Set option(s) for the lsat block that was added. If an option
206 was added previously, will update value
207 """
208 if self._last is None:
209 raise TypeError("Cannot set options before adding a block")
210 # points to dict with opts for last added block
211 self._last.update(**kwargs)
212
213
214 def configure(self):
215 """Process YAML configuration file and confgure added algorithms."""
216 # make sure all blocks in yaml file are added (otherwise they would be ignored)
217 for blockName in self._config:
218 if blockName not in self._order[self.ROOTNAME]:
219 if not blockName:
220 blockName = list(self._config[blockName].keys())[0]
221 raise ValueError(f"Unkown block {blockName} in yaml file")
222
223 # configure blocks
224 configSeq = ConfigSequence()
225 for blockName in self._order[self.ROOTNAME]:
226 if blockName == "AddConfigBlocks":
227 continue
228
229 assert blockName in self._algs
230
231 # order only applies to root blocks
232 if blockName in self._config:
233 blockConfig = self._config[blockName]
234 alg = self._algs[blockName]
235 self._configureAlg(alg, blockConfig, configSeq)
236 else:
237 continue
238 return configSeq
239
240
241 def _addNewConfigBlocks(self, modulePath, functionName,
242 algName, defaults=None, pos=None, superBlocks=None):
243 """
244 Load <functionName> from <modulePath>
245 """
246 try:
247 module = importlib.import_module(modulePath)
248 fxn = getattr(module, functionName)
249 except ModuleNotFoundError as e:
250 raise ModuleNotFoundError(f"{e}\nFailed to load {functionName} from {modulePath}")
251 else:
252 sys.modules[functionName] = fxn
253 # add new algorithm to available algorithms
254 self.addAlgConfigBlock(algName=algName, alg=fxn,
255 defaults=defaults,
256 superBlocks=superBlocks,
257 pos=pos)
258 return
259
260
261 def _configureAlg(self, block, blockConfig, configSeq=None, containerName=None,
262 extraOptions=None):
263 # 'AddConfigBlocks' blocks can be passed as either a list or a dictionary.
264 # Dictionaries are allowed so that when merging YAML files duplicate entries get automatically removed.
265 # This turns the dictionary into a list for downstream use.
266 if block.algName == "AddConfigBlocks" and isinstance(blockConfig, dict):
267 blockConfig = [options | {'algName': algName} for algName, options in blockConfig.items()]
268
269 elif not isinstance(blockConfig, list):
270 blockConfig = [blockConfig]
271
272 for options in blockConfig:
273 # Special case: propogate containerName down to subAlgs
274 if 'containerName' in options:
275 containerName = options['containerName']
276 elif containerName is not None and 'containerName' not in options:
277 options['containerName'] = containerName
278 # will check which options are associated alg and not options
279 logCPAlgTextCfg.debug(f"Configuring {block.algName}")
280 seq, funcOpts = block.makeConfig(options)
281 if not seq._blocks:
282 continue
283 algOpts = seq.setOptions(options)
284 # If containerName was not set explicitly, we can now retrieve
285 # its default value
286 if containerName is None:
287 for opt in algOpts:
288 if 'name' in opt and opt['name'] == 'containerName':
289 containerName = opt.get('value', None)
290 break # Exit the loop as we've found the key
291
292 if configSeq is not None:
293 configSeq += seq
294
295 # propagate special extra options to subalgs
296 if extraOptions is None:
297 extraOptionsList = ["skipOnData", "skipOnMC", "onlyForDSIDs"]
298 for i in algOpts:
299 if i['name'] in extraOptionsList and i['defaultValue'] != i['value']:
300 if extraOptions is None:
301 extraOptions = {}
302 extraOptions[i['name']] = i['value']
303 else:
304 algOpts = seq.setOptions(extraOptions.copy())
305
306 # check to see if there are unused parameters
307 algOpts = [i['name'] for i in algOpts]
308 expectedOptions = set(funcOpts)
309 expectedOptions |= set(algOpts)
310 expectedOptions |= set(block.subAlgs)
311
312 difference = set(options.keys()) - expectedOptions
313 difference.discard('__placeholder__')
314 if difference:
315 difference = "\n".join(difference)
316 raise ValueError(f"There are options set that are not used for "
317 f"{block.algName}:\n{difference}\n"
318 "Please check your configuration.")
319
320 # check for sub-blocks and call this function recursively
321 for alg in self._order.get(block.algName, []):
322 if alg in options:
323 subAlg = block.subAlgs[alg]
324 self._configureAlg(subAlg, options[alg], configSeq, containerName, extraOptions)
325 return configSeq
326
327
328def makeSequence(configPath, *, flags=None, algSeq=None, noSystematics=None, dataType=None, geometry=None, autoconfigFromFlags=None, isPhyslite=None, noPhysliteBroken=False):
329 """
330 """
331
332 # Historically we have used the identifier
333 # `autoconfigFromFlags`, but in the rest of the code base
334 # `flags` is used. So for now we allow either, and can hopefully
335 # at some point remove the former (21 Aug 25).
336 if autoconfigFromFlags is not None:
337 if flags is not None:
338 raise ValueError("Cannot pass both flags and autoconfigFromFlags arguments")
339 flags = autoconfigFromFlags
340 warnings.warn ('Using autoconfigFromFlags parameter is deprecated, use flags instead', category=deprecationWarningCategory, stacklevel=2)
341 elif flags is None:
342 warnings.warn ('it is deprecated to configure meta-data for analysis configuration manually, please read the configuration flags via the meta-data reader', category=deprecationWarningCategory, stacklevel=2)
343
344 from AnalysisAlgorithmsConfig.ConfigAccumulator import ConfigAccumulator
345
346 config = TextConfig(configPath)
347
348 logCPAlgTextCfg.info("Configuration file read in:")
349 config.printConfig()
350
351 logCPAlgTextCfg.info("Default algorithms:")
352 config.printAlgs(printOpts=True)
353
354 logCPAlgTextCfg.info("Configuring algorithms based on YAML file:")
355 configSeq = config.configure()
356
357 # defaults are added to config as algs are configured
358 logCPAlgTextCfg.info("Configuration used:")
359 config.printConfig()
360
361 # compile
362 configAccumulator = ConfigAccumulator(algSeq=algSeq, dataType=dataType, isPhyslite=isPhyslite, geometry=geometry, autoconfigFromFlags=autoconfigFromFlags, flags=flags, noSystematics=noSystematics)
363 configSeq.fullConfigure(configAccumulator)
364
365 # blocks can be reordered during configSeq.fullConfigure
366 logCPAlgTextCfg.info("ConfigBlocks and their configuration:")
367 configSeq.printOptions()
368
369 return configAccumulator.CA if isAthena else None
370
371
372# Combine configuration files
373#
374# See the README for more info on how this works
375#
376def combineConfigFiles(local, config_path, fragment_key="include"):
377 """
378 Recursively combine configuration fragments into `local`.
379
380 - Looks for `fragment_key` at any dict node.
381 - If value is a string/path: merge that fragment.
382 - If value is a list: merge all fragments in order.
383 For conflicts between fragments, the **earlier** file in the list wins.
384 Local keys still override the merged fragments.
385
386 Returns True if any merging happened below this node.
387 """
388 combined = False
389
390 # If this isn't an iterable there's nothing to combine
391 if isinstance(local, dict):
392 to_combine = local.values()
393 elif isinstance(local, list):
394 to_combine = local
395 else:
396 return combined
397
398 # Recurse first so that nested nodes are resolved
399 for sub in to_combine:
400 combined = combineConfigFiles(sub, config_path, fragment_key=fragment_key) or combined
401
402 # if there are no fragments to include we're done
403 if fragment_key not in local:
404 return combined
405
406 # Only dict nodes can have include keys
407 if not isinstance(local, dict):
408 return combined
409
410 # Normalize to a list of paths
411 value = local[fragment_key]
412 if isinstance(value, (str, pathlib.Path)):
413 warnings.warn(
414 f"{fragment_key} should be followed with a list of files",
415 TextConfigWarning,
416 stacklevel=2,
417 )
418 paths = [value]
419 elif isinstance(value, list):
420 paths = value
421 else:
422 raise TypeError(f"'{fragment_key}' must be a string path or a list of paths, got {type(value).__name__}")
423
424 # Build an accumulator of all fragments, earlier paths win on conflicts
425 fragments_acc = {}
426 for entry in paths:
427 fragment_path = _find_fragment(pathlib.Path(entry), config_path)
428 fragment = _load_fragment(fragment_path)
429
430 # Allow recursion inside each fragment, using the fragment's directory as base
431 combineConfigFiles(fragment, fragment_path.parent, fragment_key=fragment_key)
432
433 # Merge this fragment into the accumulator; earlier entries win
434 _merge_dicts(fragments_acc, fragment)
435
436 # Remove the key before merging to avoid re-processing it
437 del local[fragment_key]
438
439 # Merge fragments into local; local values take precedence
440 _merge_dicts(local, fragments_acc)
441
442 return True
443
444
445def _load_fragment(fragment_path: pathlib.Path):
446 """Load a YAML or JSON fragment
447
448 This function is superfluous as of the yaml 1.2 spec (which
449 has not been implemented in ATLAS Yaml dependencies).
450 Once https://github.com/yaml/pyyaml/issues/173 is resolved
451 pyyaml will support yaml 1.2, which is compatable with json.
452 Until then yaml and json behave differently in some scientific
453 notation edge cases.
454 """
455
456 with open(fragment_path, 'r') as fragment_file:
457 if fragment_path.suffix.lower() == '.json':
458 return json.load(fragment_file)
459 else:
460 return yaml.safe_load(fragment_file)
461
462def _find_fragment(fragment_path, config_path):
463 paths_to_check = [
464 fragment_path,
465 config_path / fragment_path,
466 *[x / fragment_path for x in os.environ["DATAPATH"].split(":")]
467 ]
468 for path in paths_to_check:
469 if path.exists():
470 return path
471
472 raise FileNotFoundError(fragment_path)
473
474
475def _merge_dicts(local, fragment):
476 # In the list case append the fragment to the local list
477 if isinstance(local, list):
478 local += fragment
479 return
480 # In the dict case, append only missing values to local: the local
481 # values take precedence over the fragment ones.
482 if isinstance(local, dict):
483 for key, value in fragment.items():
484 if key in local:
485 _merge_dicts(local[key], value)
486 else:
487 local[key] = value
488 return
void print(char *figname, TCanvas *c1)
_configureAlg(self, block, blockConfig, configSeq=None, containerName=None, extraOptions=None)
__init__(self, yamlPath=None, *, config=None, addDefaultBlocks=True)
Definition ConfigText.py:40
saveYaml(self, filePath='config.yaml', default_flow_style=False, **kwargs)
_addNewConfigBlocks(self, modulePath, functionName, algName, defaults=None, pos=None, superBlocks=None)
cleanupPlaceholders(self, config)
Definition ConfigText.py:94
loadConfig(self, yamlPath=None, *, configDict=None)
printConfig(self, sort=False, jsonFormat=False)
addBlock(self, name, **kwargs)
preprocessConfig(self, config, algs)
Definition ConfigText.py:70
STL class.
T * get(TKey *tobj)
get a TObject* from a TKey* (why can't a TObject be a TKey?)
Definition hcg.cxx:130
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
Definition merge.py:1
makeSequence(configPath, *, flags=None, algSeq=None, noSystematics=None, dataType=None, geometry=None, autoconfigFromFlags=None, isPhyslite=None, noPhysliteBroken=False)
_merge_dicts(local, fragment)
_load_fragment(pathlib.Path fragment_path)
printYaml(d, sort=False, jsonFormat=False)
Definition ConfigText.py:31
readYaml(yamlPath)
Definition ConfigText.py:22
combineConfigFiles(local, config_path, fragment_key="include")
_find_fragment(fragment_path, config_path)