ATLAS Offline Software
Loading...
Searching...
No Matches
ConfigText.py
Go to the documentation of this file.
1# Copyright (C) 2002-2026 CERN for the benefit of the ATLAS collaboration
2#
3# @author Joseph Lambert
4
5import yaml
6import json
7import os
8import sys
9import importlib
10import warnings
11from pathlib import Path
12
13from AnalysisAlgorithmsConfig.ConfigSequence import ConfigSequence
14from AnalysisAlgorithmsConfig.ConfigFactory import ConfigFactory
15from AnalysisAlgorithmsConfig.ConfigAccumulator import deprecationWarningCategory
16
17from AnaAlgorithm.DualUseConfig import isAthena
18from AnaAlgorithm.Logging import logging
19logCPAlgTextCfg = logging.getLogger('CPAlgTextCfg')
20
21
22def readYaml(yamlPath):
23 """Loads YAML file into a dictionary"""
24 if not os.path.isfile(yamlPath):
25 raise ValueError(f"{yamlPath} is not a file.")
26 with open(yamlPath, 'r') as f:
27 textConfig = yaml.safe_load(f)
28 return textConfig
29
30
31def printYaml(d, sort=False, jsonFormat=False):
32 """Prints a dictionary as YAML"""
33 print(yaml.dump(d, default_flow_style=jsonFormat, sort_keys=sort))
34
35class TextConfigWarning(FutureWarning):
36 pass
37
38
39class TextConfig(ConfigFactory):
40 def __init__(self, yamlPath=None, *, config=None, addDefaultBlocks=True):
41 super().__init__(addDefaultBlocks=False)
42
43 if yamlPath and config:
44 raise ValueError("Cannot specify both yamlPath and config. Use one or the other.")
45
46 # Block to add new blocks to this object
47 self.addAlgConfigBlock(algName="AddConfigBlocks", alg=self._addNewConfigBlocks,
48 defaults={'self': self})
49 # add default blocks
50 if addDefaultBlocks:
51 self.addDefaultAlgs()
52 # load yaml
53 self._config = {}
54 # do not allow for loading multiple yaml files
55 self.__loadedYaml = False
56 if yamlPath is not None or config is not None:
57 self.loadConfig(yamlPath, configDict=config)
58 # last is used for setOptionValue when using addBlock
59 self._last = None
60
61
62 def setConfig(self, config):
63 """Print YAML configuration file."""
64 if self._config:
65 raise ValueError("Configuration has already been loaded.")
66 self._config = config
67 return
68
69 # Less-than-ideal fix introduced in !76767
70 def preprocessConfig(self, config, algs):
71 """
72 Preprocess the configuration dictionary.
73 Ensure blocks with only sub-blocks are initialized with an empty dictionary.
74 """
75 def processNode(node, algs):
76 if not isinstance(node, dict):
77 return # Base case: not a dictionary
78 for blockName, blockContent in list(node.items()):
79 # If the block name is recognized in algs
80 if blockName in algs:
81 # If the block only defines sub-blocks, initialize it
82 if isinstance(blockContent, dict) and not any(
83 key in algs[blockName].options for key in blockContent
84 ):
85 # Ensure parent block is initialized as an empty dictionary
86 node[blockName] = {'__placeholder__': True, **blockContent}
87 # Recurse into sub-blocks
88 processNode(node[blockName], algs[blockName].subAlgs)
89
90 # Start processing from the root of the configuration
91 processNode(config, algs)
92
93 # Less-than-ideal fix introduced in !76767
94 def cleanupPlaceholders(self, config):
95 """
96 Remove placeholder markers after initialization.
97 """
98 if not isinstance(config, dict):
99 return
100 if "__placeholder__" in config:
101 del config["__placeholder__"]
102 for key, value in config.items():
103 self.cleanupPlaceholders(value)
104
105 def loadConfig(self, yamlPath=None, *, configDict=None):
106 """
107 read a YAML file. Will combine with any config blocks added using python
108 """
109 if self.__loadedYaml or isinstance(yamlPath, list):
110 raise NotImplementedError("Mering multiple yaml files is not implemented.")
111 self.__loadedYaml = True
112
113 def merge(config, algs, path=''):
114 """Add to config block-by-block"""
115 if not isinstance(config, list):
116 config = [config]
117 # loop over list of blocks with same block name
118 for blocks in config:
119 # deal with case where empty dict is config
120 if blocks == {} and path:
121 self.addBlock(path)
122 return
123 # remove any subBlocks from block config
124 subBlocks = {}
125 for blockName in algs:
126 if blockName in blocks:
127 subBlocks[blockName] = blocks.pop(blockName)
128 # anything left should be a block and it's configuration
129 if blocks:
130 self.addBlock(path, **blocks)
131 # add in any subBlocks
132 for subName, subBlock in subBlocks.items():
133 newPath = f'{path}.{subName}' if path else subName
134 merge(subBlock, algs[subName].subAlgs, newPath)
135 return
136
137 logCPAlgTextCfg.debug(f'loading {yamlPath}')
138 if configDict is not None:
139 # if configDict is provided, use it directly
140 config = configDict
141 else:
142 config = readYaml(yamlPath)
143 # check if blocks are defined in yaml file
144 if "AddConfigBlocks" in config:
145 self._configureAlg(self._algs["AddConfigBlocks"], config["AddConfigBlocks"])
146
147 # Preprocess the configuration dictionary (see !76767)
148 self.preprocessConfig(config, self._algs)
149
150 merge(config, self._algs)
151
152 # Cleanup placeholders (see !76767)
153 self.cleanupPlaceholders(config)
154
155 return
156
157
158 def printConfig(self, sort=False, jsonFormat=False):
159 """Print YAML configuration file."""
160 if self._config is None:
161 raise ValueError("No configuration has been loaded.")
162 printYaml(self._config, sort, jsonFormat)
163 return
164
165
166 def saveYaml(self, filePath='config.yaml', default_flow_style=False,
167 **kwargs):
168 """
169 Convert dictionary representation to yaml and save
170 """
171 logCPAlgTextCfg.info(f"Saving configuration to {filePath}")
172 config = self._config
173 with open(filePath, 'w') as outfile:
174 yaml.dump(config, outfile, default_flow_style=False, **kwargs)
175 return
176
177
178 def addBlock(self, name, **kwargs):
179 """
180 Create entry into dictionary representing the text configuration
181 """
182 def setEntry(name, config, opts):
183 if '.' not in name:
184 if name not in config:
185 config[name] = opts
186 elif isinstance(config[name], list):
187 config[name].append(opts)
188 else:
189 config[name] = [config[name], opts]
190 # set last added block for setOptionValue
191 self._last = opts
192 else:
193 name, rest = name[:name.index('.')], name[name.index('.') + 1:]
194 config = config[name]
195 if isinstance(config, list):
196 config = config[-1]
197 setEntry(rest, config, opts)
198 return
199 setEntry(name, self._config, dict(kwargs))
200 return
201
202
203 def setOptions(self, **kwargs):
204 """
205 Set option(s) for the lsat block that was added. If an option
206 was added previously, will update value
207 """
208 if self._last is None:
209 raise TypeError("Cannot set options before adding a block")
210 # points to dict with opts for last added block
211 self._last.update(**kwargs)
212
213
214 def configure(self):
215 """Process YAML configuration file and confgure added algorithms."""
216 # make sure all blocks in yaml file are added (otherwise they would be ignored)
217 for blockName in self._config:
218 if blockName not in self._order[self.ROOTNAME]:
219 if not blockName:
220 blockName = list(self._config[blockName].keys())[0]
221 raise ValueError(f"Unkown block {blockName} in yaml file")
222
223 # configure blocks
224 configSeq = ConfigSequence()
225 for blockName in self._order[self.ROOTNAME]:
226 if blockName == "AddConfigBlocks":
227 continue
228
229 assert blockName in self._algs
230
231 # order only applies to root blocks
232 if blockName in self._config:
233 blockConfig = self._config[blockName]
234 alg = self._algs[blockName]
235 self._configureAlg(alg, blockConfig, configSeq)
236 else:
237 continue
238 return configSeq
239
240
241 def _addNewConfigBlocks(self, modulePath, functionName,
242 algName, defaults=None, pos=None, superBlocks=None):
243 """
244 Load <functionName> from <modulePath>
245 """
246 try:
247 module = importlib.import_module(modulePath)
248 fxn = getattr(module, functionName)
249 except ModuleNotFoundError as e:
250 raise ModuleNotFoundError(f"{e}\nFailed to load {functionName} from {modulePath}")
251 else:
252 sys.modules[functionName] = fxn
253 # add new algorithm to available algorithms
254 self.addAlgConfigBlock(algName=algName, alg=fxn,
255 defaults=defaults,
256 superBlocks=superBlocks,
257 pos=pos)
258 return
259
260
261 def _configureAlg(self, block, blockConfig, configSeq=None, containerName=None,
262 extraOptions=None):
263 # 'AddConfigBlocks' blocks can be passed as either a list or a dictionary.
264 # Dictionaries are allowed so that when merging YAML files duplicate entries get automatically removed.
265 # This turns the dictionary into a list for downstream use.
266 if block.algName == "AddConfigBlocks" and isinstance(blockConfig, dict):
267 blockConfig = [options | {'algName': algName} for algName, options in blockConfig.items()]
268
269 elif not isinstance(blockConfig, list):
270 blockConfig = [blockConfig]
271
272 for options in blockConfig:
273 # Special case: propogate containerName down to subAlgs
274 if 'containerName' in options:
275 containerName = options['containerName']
276 elif containerName is not None and 'containerName' not in options:
277 options['containerName'] = containerName
278 # will check which options are associated alg and not options
279 logCPAlgTextCfg.debug(f"Configuring {block.algName}")
280 seq, funcOpts = block.makeConfig(options)
281 if not seq._blocks:
282 continue
283 algOpts = seq.setOptions(options)
284 # If containerName was not set explicitly, we can now retrieve
285 # its default value
286 if containerName is None:
287 for opt in algOpts:
288 if 'name' in opt and opt['name'] == 'containerName':
289 containerName = opt.get('value', None)
290 break # Exit the loop as we've found the key
291
292 if configSeq is not None:
293 configSeq += seq
294
295 # propagate special extra options to subalgs
296 extraOptionsForAlg = extraOptions.copy() if extraOptions is not None else None
297 if extraOptionsForAlg is None:
298 extraOptionsList = ["skipOnData", "skipOnMC", "onlyForDSIDs"]
299 for i in algOpts:
300 if i['name'] in extraOptionsList and i['defaultValue'] != i['value']:
301 if extraOptionsForAlg is None:
302 extraOptionsForAlg = {}
303 extraOptionsForAlg[i['name']] = i['value']
304 else:
305 algOpts = seq.setOptions(extraOptionsForAlg.copy())
306
307 # check to see if there are unused parameters
308 algOpts = [i['name'] for i in algOpts]
309 expectedOptions = set(funcOpts)
310 expectedOptions |= set(algOpts)
311 expectedOptions |= set(block.subAlgs)
312
313 difference = set(options.keys()) - expectedOptions
314 difference.discard('__placeholder__')
315 if difference:
316 difference = "\n".join(difference)
317 raise ValueError(f"There are options set that are not used for "
318 f"{block.algName}:\n{difference}\n"
319 "Please check your configuration.")
320
321 # check for sub-blocks and call this function recursively
322 for alg in self._order.get(block.algName, []):
323 if alg in options:
324 subAlg = block.subAlgs[alg]
325 self._configureAlg(subAlg, options[alg], configSeq, containerName, extraOptionsForAlg)
326 return configSeq
327
328
329def makeSequence(configPath, *, flags=None, algSeq=None, noSystematics=None, dataType=None, geometry=None, autoconfigFromFlags=None, isPhyslite=None, noPhysliteBroken=False):
330 """
331 """
332
333 # Historically we have used the identifier
334 # `autoconfigFromFlags`, but in the rest of the code base
335 # `flags` is used. So for now we allow either, and can hopefully
336 # at some point remove the former (21 Aug 25).
337 if autoconfigFromFlags is not None:
338 if flags is not None:
339 raise ValueError("Cannot pass both flags and autoconfigFromFlags arguments")
340 flags = autoconfigFromFlags
341 warnings.warn ('Using autoconfigFromFlags parameter is deprecated, use flags instead', category=deprecationWarningCategory, stacklevel=2)
342 elif flags is None:
343 warnings.warn ('it is deprecated to configure meta-data for analysis configuration manually, please read the configuration flags via the meta-data reader', category=deprecationWarningCategory, stacklevel=2)
344
345 from AnalysisAlgorithmsConfig.ConfigAccumulator import ConfigAccumulator
346
347 config = TextConfig(configPath)
348
349 logCPAlgTextCfg.info("Configuration file read in:")
350 config.printConfig()
351
352 logCPAlgTextCfg.info("Default algorithms:")
353 config.printAlgs(printOpts=True)
354
355 logCPAlgTextCfg.info("Configuring algorithms based on YAML file:")
356 configSeq = config.configure()
357
358 # defaults are added to config as algs are configured
359 logCPAlgTextCfg.info("Configuration used:")
360 config.printConfig()
361
362 # compile
363 configAccumulator = ConfigAccumulator(algSeq=algSeq, dataType=dataType, isPhyslite=isPhyslite, geometry=geometry, autoconfigFromFlags=autoconfigFromFlags, flags=flags, noSystematics=noSystematics)
364 configSeq.fullConfigure(configAccumulator)
365
366 # blocks can be reordered during configSeq.fullConfigure
367 logCPAlgTextCfg.info("ConfigBlocks and their configuration:")
368 configSeq.printOptions()
369
370 return configAccumulator.CA if isAthena else None
371
372
373# Combine configuration files
374#
375# See the README for more info on how this works
376#
377def combineConfigFiles(local, config_path, fragment_key="include"):
378 """
379 Recursively combine configuration fragments into `local`.
380
381 - Looks for `fragment_key` at any dict node.
382 - If value is a string/path: merge that fragment.
383 - If value is a list: merge all fragments in order.
384 For conflicts between fragments, the **earlier** file in the list wins.
385 Local keys still override the merged fragments.
386
387 Returns True if any merging happened below this node.
388 """
389 if not isinstance(config_path, (list, Path, str)):
390 raise ValueError("Please specify the path or a list of paths where configuration is expected to reside")
391 if isinstance(config_path, list):
392 config_paths = [Path(path) for path in config_path]
393 else:
394 warnings.warn(
395 "Passing a single path to combineConfigFiles is deprecated, please pass a list of paths instead",
396 TextConfigWarning,
397 stacklevel=2,
398 )
399 config_paths = [Path(config_path)]
400
401 combined = False
402
403 # If this isn't an iterable there's nothing to combine
404 if isinstance(local, dict):
405 to_combine = local.values()
406 elif isinstance(local, list):
407 to_combine = local
408 else:
409 return combined
410
411 # Recurse first so that nested nodes are resolved
412 for sub in to_combine:
413 combined = combineConfigFiles(sub, config_paths, fragment_key=fragment_key) or combined
414
415 # if there are no fragments to include we're done
416 if fragment_key not in local:
417 return combined
418
419 # Only dict nodes can have include keys
420 if not isinstance(local, dict):
421 return combined
422
423 # Normalize to a list of paths
424 value = local[fragment_key]
425 if isinstance(value, (str, Path)):
426 warnings.warn(
427 f"{fragment_key} should be followed with a list of files",
428 TextConfigWarning,
429 stacklevel=2,
430 )
431 paths = [value]
432 elif isinstance(value, list):
433 paths = value
434 else:
435 raise TypeError(f"'{fragment_key}' must be a string path or a list of paths, got {type(value).__name__}")
436
437 # Build an accumulator of all fragments, earlier paths win on conflicts
438 fragments_acc = {}
439 for entry in paths:
440 fragment_path = _find_fragment(Path(entry), config_paths)
441 fragment = _load_fragment(fragment_path)
442
443 # Allow recursion inside each fragment, using the fragment's directory as base
444 combineConfigFiles(fragment, [fragment_path.parent, *config_paths], fragment_key=fragment_key)
445
446 # Merge this fragment into the accumulator; earlier entries win
447 _merge_dicts(fragments_acc, fragment)
448
449 # Remove the key before merging to avoid re-processing it
450 del local[fragment_key]
451
452 # Merge fragments into local; local values take precedence
453 _merge_dicts(local, fragments_acc)
454
455 return True
456
457
458def _load_fragment(fragment_path: Path):
459 """Load a YAML or JSON fragment
460
461 This function is superfluous as of the yaml 1.2 spec (which
462 has not been implemented in ATLAS Yaml dependencies).
463 Once https://github.com/yaml/pyyaml/issues/173 is resolved
464 pyyaml will support yaml 1.2, which is compatable with json.
465 Until then yaml and json behave differently in some scientific
466 notation edge cases.
467 """
468
469 with open(fragment_path, 'r') as fragment_file:
470 if fragment_path.suffix.lower() == '.json':
471 return json.load(fragment_file)
472 else:
473 return yaml.safe_load(fragment_file)
474
475def _find_fragment(fragment_path: Path, config_paths: list[Path]):
476 paths_to_check = [
477 fragment_path,
478 *[path / fragment_path for path in config_paths],
479 *[x / fragment_path for x in os.environ["DATAPATH"].split(":")]
480 ]
481 for path in paths_to_check:
482 if path.exists():
483 return path
484
485 raise FileNotFoundError(fragment_path)
486
487
488def _merge_dicts(local, fragment):
489 # In the list case append the fragment to the local list
490 if isinstance(local, list):
491 local += fragment
492 return
493 # In the dict case, append only missing values to local: the local
494 # values take precedence over the fragment ones.
495 if isinstance(local, dict):
496 for key, value in fragment.items():
497 if key in local:
498 _merge_dicts(local[key], value)
499 else:
500 local[key] = value
501 return
void print(char *figname, TCanvas *c1)
_configureAlg(self, block, blockConfig, configSeq=None, containerName=None, extraOptions=None)
__init__(self, yamlPath=None, *, config=None, addDefaultBlocks=True)
Definition ConfigText.py:40
saveYaml(self, filePath='config.yaml', default_flow_style=False, **kwargs)
_addNewConfigBlocks(self, modulePath, functionName, algName, defaults=None, pos=None, superBlocks=None)
cleanupPlaceholders(self, config)
Definition ConfigText.py:94
loadConfig(self, yamlPath=None, *, configDict=None)
printConfig(self, sort=False, jsonFormat=False)
addBlock(self, name, **kwargs)
preprocessConfig(self, config, algs)
Definition ConfigText.py:70
STL class.
T * get(TKey *tobj)
get a TObject* from a TKey* (why can't a TObject be a TKey?)
Definition hcg.cxx:132
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:179
Definition merge.py:1
makeSequence(configPath, *, flags=None, algSeq=None, noSystematics=None, dataType=None, geometry=None, autoconfigFromFlags=None, isPhyslite=None, noPhysliteBroken=False)
_merge_dicts(local, fragment)
_load_fragment(Path fragment_path)
_find_fragment(Path fragment_path, list[Path] config_paths)
printYaml(d, sort=False, jsonFormat=False)
Definition ConfigText.py:31
readYaml(yamlPath)
Definition ConfigText.py:22
combineConfigFiles(local, config_path, fragment_key="include")