ATLAS Offline Software
Loading...
Searching...
No Matches
ConfigText.py
Go to the documentation of this file.
1# Copyright (C) 2002-2026 CERN for the benefit of the ATLAS collaboration
2#
3# @author Joseph Lambert
4
5import yaml
6import json
7import os
8import sys
9import importlib
10import pathlib
11import warnings
12
13from AnalysisAlgorithmsConfig.ConfigSequence import ConfigSequence
14from AnalysisAlgorithmsConfig.ConfigFactory import ConfigFactory
15from AnalysisAlgorithmsConfig.ConfigAccumulator import deprecationWarningCategory
16
17from AnaAlgorithm.DualUseConfig import isAthena
18from AnaAlgorithm.Logging import logging
19logCPAlgTextCfg = logging.getLogger('CPAlgTextCfg')
20
21
22def readYaml(yamlPath):
23 """Loads YAML file into a dictionary"""
24 if not os.path.isfile(yamlPath):
25 raise ValueError(f"{yamlPath} is not a file.")
26 with open(yamlPath, 'r') as f:
27 textConfig = yaml.safe_load(f)
28 return textConfig
29
30
31def printYaml(d, sort=False, jsonFormat=False):
32 """Prints a dictionary as YAML"""
33 print(yaml.dump(d, default_flow_style=jsonFormat, sort_keys=sort))
34
35class TextConfigWarning(FutureWarning):
36 pass
37
38
39class TextConfig(ConfigFactory):
40 def __init__(self, yamlPath=None, *, config=None, addDefaultBlocks=True):
41 super().__init__(addDefaultBlocks=False)
42
43 if yamlPath and config:
44 raise ValueError("Cannot specify both yamlPath and config. Use one or the other.")
45
46 # Block to add new blocks to this object
47 self.addAlgConfigBlock(algName="AddConfigBlocks", alg=self._addNewConfigBlocks,
48 defaults={'self': self})
49 # add default blocks
50 if addDefaultBlocks:
51 self.addDefaultAlgs()
52 # load yaml
53 self._config = {}
54 # do not allow for loading multiple yaml files
55 self.__loadedYaml = False
56 if yamlPath is not None or config is not None:
57 self.loadConfig(yamlPath, configDict=config)
58 # last is used for setOptionValue when using addBlock
59 self._last = None
60
61
62 def setConfig(self, config):
63 """Print YAML configuration file."""
64 if self._config:
65 raise ValueError("Configuration has already been loaded.")
66 self._config = config
67 return
68
69 # Less-than-ideal fix introduced in !76767
70 def preprocessConfig(self, config, algs):
71 """
72 Preprocess the configuration dictionary.
73 Ensure blocks with only sub-blocks are initialized with an empty dictionary.
74 """
75 def processNode(node, algs):
76 if not isinstance(node, dict):
77 return # Base case: not a dictionary
78 for blockName, blockContent in list(node.items()):
79 # If the block name is recognized in algs
80 if blockName in algs:
81 # If the block only defines sub-blocks, initialize it
82 if isinstance(blockContent, dict) and not any(
83 key in algs[blockName].options for key in blockContent
84 ):
85 # Ensure parent block is initialized as an empty dictionary
86 node[blockName] = {'__placeholder__': True, **blockContent}
87 # Recurse into sub-blocks
88 processNode(node[blockName], algs[blockName].subAlgs)
89
90 # Start processing from the root of the configuration
91 processNode(config, algs)
92
93 # Less-than-ideal fix introduced in !76767
94 def cleanupPlaceholders(self, config):
95 """
96 Remove placeholder markers after initialization.
97 """
98 if not isinstance(config, dict):
99 return
100 if "__placeholder__" in config:
101 del config["__placeholder__"]
102 for key, value in config.items():
103 self.cleanupPlaceholders(value)
104
105 def loadConfig(self, yamlPath=None, *, configDict=None):
106 """
107 read a YAML file. Will combine with any config blocks added using python
108 """
109 if self.__loadedYaml or isinstance(yamlPath, list):
110 raise NotImplementedError("Mering multiple yaml files is not implemented.")
111 self.__loadedYaml = True
112
113 def merge(config, algs, path=''):
114 """Add to config block-by-block"""
115 if not isinstance(config, list):
116 config = [config]
117 # loop over list of blocks with same block name
118 for blocks in config:
119 # deal with case where empty dict is config
120 if blocks == {} and path:
121 self.addBlock(path)
122 return
123 # remove any subBlocks from block config
124 subBlocks = {}
125 for blockName in algs:
126 if blockName in blocks:
127 subBlocks[blockName] = blocks.pop(blockName)
128 # anything left should be a block and it's configuration
129 if blocks:
130 self.addBlock(path, **blocks)
131 # add in any subBlocks
132 for subName, subBlock in subBlocks.items():
133 newPath = f'{path}.{subName}' if path else subName
134 merge(subBlock, algs[subName].subAlgs, newPath)
135 return
136
137 logCPAlgTextCfg.debug(f'loading {yamlPath}')
138 if configDict is not None:
139 # if configDict is provided, use it directly
140 config = configDict
141 else:
142 config = readYaml(yamlPath)
143 # check if blocks are defined in yaml file
144 if "AddConfigBlocks" in config:
145 self._configureAlg(self._algs["AddConfigBlocks"], config["AddConfigBlocks"])
146
147 # Preprocess the configuration dictionary (see !76767)
148 self.preprocessConfig(config, self._algs)
149
150 merge(config, self._algs)
151
152 # Cleanup placeholders (see !76767)
153 self.cleanupPlaceholders(config)
154
155 return
156
157
158 def printConfig(self, sort=False, jsonFormat=False):
159 """Print YAML configuration file."""
160 if self._config is None:
161 raise ValueError("No configuration has been loaded.")
162 printYaml(self._config, sort, jsonFormat)
163 return
164
165
166 def saveYaml(self, filePath='config.yaml', default_flow_style=False,
167 **kwargs):
168 """
169 Convert dictionary representation to yaml and save
170 """
171 logCPAlgTextCfg.info(f"Saving configuration to {filePath}")
172 config = self._config
173 with open(filePath, 'w') as outfile:
174 yaml.dump(config, outfile, default_flow_style=False, **kwargs)
175 return
176
177
178 def addBlock(self, name, **kwargs):
179 """
180 Create entry into dictionary representing the text configuration
181 """
182 def setEntry(name, config, opts):
183 if '.' not in name:
184 if name not in config:
185 config[name] = opts
186 elif isinstance(config[name], list):
187 config[name].append(opts)
188 else:
189 config[name] = [config[name], opts]
190 # set last added block for setOptionValue
191 self._last = opts
192 else:
193 name, rest = name[:name.index('.')], name[name.index('.') + 1:]
194 config = config[name]
195 if isinstance(config, list):
196 config = config[-1]
197 setEntry(rest, config, opts)
198 return
199 setEntry(name, self._config, dict(kwargs))
200 return
201
202
203 def setOptions(self, **kwargs):
204 """
205 Set option(s) for the lsat block that was added. If an option
206 was added previously, will update value
207 """
208 if self._last is None:
209 raise TypeError("Cannot set options before adding a block")
210 # points to dict with opts for last added block
211 self._last.update(**kwargs)
212
213
214 def configure(self):
215 """Process YAML configuration file and confgure added algorithms."""
216 # make sure all blocks in yaml file are added (otherwise they would be ignored)
217 for blockName in self._config:
218 if blockName not in self._order[self.ROOTNAME]:
219 if not blockName:
220 blockName = list(self._config[blockName].keys())[0]
221 raise ValueError(f"Unkown block {blockName} in yaml file")
222
223 # configure blocks
224 configSeq = ConfigSequence()
225 for blockName in self._order[self.ROOTNAME]:
226 if blockName == "AddConfigBlocks":
227 continue
228
229 assert blockName in self._algs
230
231 # order only applies to root blocks
232 if blockName in self._config:
233 blockConfig = self._config[blockName]
234 alg = self._algs[blockName]
235 self._configureAlg(alg, blockConfig, configSeq)
236 else:
237 continue
238 return configSeq
239
240
241 def _addNewConfigBlocks(self, modulePath, functionName,
242 algName, defaults=None, pos=None, superBlocks=None):
243 """
244 Load <functionName> from <modulePath>
245 """
246 try:
247 module = importlib.import_module(modulePath)
248 fxn = getattr(module, functionName)
249 except ModuleNotFoundError as e:
250 raise ModuleNotFoundError(f"{e}\nFailed to load {functionName} from {modulePath}")
251 else:
252 sys.modules[functionName] = fxn
253 # add new algorithm to available algorithms
254 self.addAlgConfigBlock(algName=algName, alg=fxn,
255 defaults=defaults,
256 superBlocks=superBlocks,
257 pos=pos)
258 return
259
260
261 def _configureAlg(self, block, blockConfig, configSeq=None, containerName=None,
262 extraOptions=None):
263 # 'AddConfigBlocks' blocks can be passed as either a list or a dictionary.
264 # Dictionaries are allowed so that when merging YAML files duplicate entries get automatically removed.
265 # This turns the dictionary into a list for downstream use.
266 if block.algName == "AddConfigBlocks" and isinstance(blockConfig, dict):
267 blockConfig = [options | {'algName': algName} for algName, options in blockConfig.items()]
268
269 elif not isinstance(blockConfig, list):
270 blockConfig = [blockConfig]
271
272 for options in blockConfig:
273 # Special case: propogate containerName down to subAlgs
274 if 'containerName' in options:
275 containerName = options['containerName']
276 elif containerName is not None and 'containerName' not in options:
277 options['containerName'] = containerName
278 # will check which options are associated alg and not options
279 logCPAlgTextCfg.debug(f"Configuring {block.algName}")
280 seq, funcOpts = block.makeConfig(options)
281 if not seq._blocks:
282 continue
283 algOpts = seq.setOptions(options)
284 # If containerName was not set explicitly, we can now retrieve
285 # its default value
286 if containerName is None:
287 for opt in algOpts:
288 if 'name' in opt and opt['name'] == 'containerName':
289 containerName = opt.get('value', None)
290 break # Exit the loop as we've found the key
291
292 if configSeq is not None:
293 configSeq += seq
294
295 # propagate special extra options to subalgs
296 if extraOptions is None:
297 extraOptionsList = ["skipOnData", "skipOnMC", "onlyForDSIDs"]
298 for i in algOpts:
299 if i['name'] in extraOptionsList and i['defaultValue'] != i['value']:
300 if extraOptions is None:
301 extraOptions = {}
302 extraOptions[i['name']] = i['value']
303 # TODO: figure out why onlyForDSIDs is not properly updated in algOpts
304 if i['name'] == "onlyForDSIDs" and "onlyForDSIDs" in options and options["onlyForDSIDs"]:
305 if extraOptions is None:
306 extraOptions = {}
307 extraOptions[i['name']] = options["onlyForDSIDs"]
308 else:
309 algOpts = seq.setOptions(extraOptions.copy())
310
311 # check to see if there are unused parameters
312 algOpts = [i['name'] for i in algOpts]
313 expectedOptions = set(funcOpts)
314 expectedOptions |= set(algOpts)
315 expectedOptions |= set(block.subAlgs)
316
317 difference = set(options.keys()) - expectedOptions
318 difference.discard('__placeholder__')
319 if difference:
320 difference = "\n".join(difference)
321 raise ValueError(f"There are options set that are not used for "
322 f"{block.algName}:\n{difference}\n"
323 "Please check your configuration.")
324
325 # check for sub-blocks and call this function recursively
326 for alg in self._order.get(block.algName, []):
327 if alg in options:
328 subAlg = block.subAlgs[alg]
329 self._configureAlg(subAlg, options[alg], configSeq, containerName, extraOptions)
330 return configSeq
331
332
333def makeSequence(configPath, *, flags=None, algSeq=None, noSystematics=None, dataType=None, geometry=None, autoconfigFromFlags=None, isPhyslite=None, noPhysliteBroken=False):
334 """
335 """
336
337 # Historically we have used the identifier
338 # `autoconfigFromFlags`, but in the rest of the code base
339 # `flags` is used. So for now we allow either, and can hopefully
340 # at some point remove the former (21 Aug 25).
341 if autoconfigFromFlags is not None:
342 if flags is not None:
343 raise ValueError("Cannot pass both flags and autoconfigFromFlags arguments")
344 flags = autoconfigFromFlags
345 warnings.warn ('Using autoconfigFromFlags parameter is deprecated, use flags instead', category=deprecationWarningCategory, stacklevel=2)
346 elif flags is None:
347 warnings.warn ('it is deprecated to configure meta-data for analysis configuration manually, please read the configuration flags via the meta-data reader', category=deprecationWarningCategory, stacklevel=2)
348
349 from AnalysisAlgorithmsConfig.ConfigAccumulator import ConfigAccumulator
350
351 config = TextConfig(configPath)
352
353 logCPAlgTextCfg.info("Configuration file read in:")
354 config.printConfig()
355
356 logCPAlgTextCfg.info("Default algorithms:")
357 config.printAlgs(printOpts=True)
358
359 logCPAlgTextCfg.info("Configuring algorithms based on YAML file:")
360 configSeq = config.configure()
361
362 # defaults are added to config as algs are configured
363 logCPAlgTextCfg.info("Configuration used:")
364 config.printConfig()
365
366 # compile
367 configAccumulator = ConfigAccumulator(algSeq=algSeq, dataType=dataType, isPhyslite=isPhyslite, geometry=geometry, autoconfigFromFlags=autoconfigFromFlags, flags=flags, noSystematics=noSystematics)
368 configSeq.fullConfigure(configAccumulator)
369
370 # blocks can be reordered during configSeq.fullConfigure
371 logCPAlgTextCfg.info("ConfigBlocks and their configuration:")
372 configSeq.printOptions()
373
374 return configAccumulator.CA if isAthena else None
375
376
377# Combine configuration files
378#
379# See the README for more info on how this works
380#
381def combineConfigFiles(local, config_path, fragment_key="include"):
382 """
383 Recursively combine configuration fragments into `local`.
384
385 - Looks for `fragment_key` at any dict node.
386 - If value is a string/path: merge that fragment.
387 - If value is a list: merge all fragments in order.
388 For conflicts between fragments, the **earlier** file in the list wins.
389 Local keys still override the merged fragments.
390
391 Returns True if any merging happened below this node.
392 """
393 combined = False
394
395 # If this isn't an iterable there's nothing to combine
396 if isinstance(local, dict):
397 to_combine = local.values()
398 elif isinstance(local, list):
399 to_combine = local
400 else:
401 return combined
402
403 # Recurse first so that nested nodes are resolved
404 for sub in to_combine:
405 combined = combineConfigFiles(sub, config_path, fragment_key=fragment_key) or combined
406
407 # if there are no fragments to include we're done
408 if fragment_key not in local:
409 return combined
410
411 # Only dict nodes can have include keys
412 if not isinstance(local, dict):
413 return combined
414
415 # Normalize to a list of paths
416 value = local[fragment_key]
417 if isinstance(value, (str, pathlib.Path)):
418 warnings.warn(
419 f"{fragment_key} should be followed with a list of files",
420 TextConfigWarning,
421 stacklevel=2,
422 )
423 paths = [value]
424 elif isinstance(value, list):
425 paths = value
426 else:
427 raise TypeError(f"'{fragment_key}' must be a string path or a list of paths, got {type(value).__name__}")
428
429 # Build an accumulator of all fragments, earlier paths win on conflicts
430 fragments_acc = {}
431 for entry in paths:
432 fragment_path = _find_fragment(pathlib.Path(entry), config_path)
433 fragment = _load_fragment(fragment_path)
434
435 # Allow recursion inside each fragment, using the fragment's directory as base
436 combineConfigFiles(fragment, fragment_path.parent, fragment_key=fragment_key)
437
438 # Merge this fragment into the accumulator; earlier entries win
439 _merge_dicts(fragments_acc, fragment)
440
441 # Remove the key before merging to avoid re-processing it
442 del local[fragment_key]
443
444 # Merge fragments into local; local values take precedence
445 _merge_dicts(local, fragments_acc)
446
447 return True
448
449
450def _load_fragment(fragment_path: pathlib.Path):
451 """Load a YAML or JSON fragment
452
453 This function is superfluous as of the yaml 1.2 spec (which
454 has not been implemented in ATLAS Yaml dependencies).
455 Once https://github.com/yaml/pyyaml/issues/173 is resolved
456 pyyaml will support yaml 1.2, which is compatable with json.
457 Until then yaml and json behave differently in some scientific
458 notation edge cases.
459 """
460
461 with open(fragment_path, 'r') as fragment_file:
462 if fragment_path.suffix.lower() == '.json':
463 return json.load(fragment_file)
464 else:
465 return yaml.safe_load(fragment_file)
466
467def _find_fragment(fragment_path, config_path):
468 paths_to_check = [
469 fragment_path,
470 config_path / fragment_path,
471 *[x / fragment_path for x in os.environ["DATAPATH"].split(":")]
472 ]
473 for path in paths_to_check:
474 if path.exists():
475 return path
476
477 raise FileNotFoundError(fragment_path)
478
479
480def _merge_dicts(local, fragment):
481 # In the list case append the fragment to the local list
482 if isinstance(local, list):
483 local += fragment
484 return
485 # In the dict case, append only missing values to local: the local
486 # values take precedence over the fragment ones.
487 if isinstance(local, dict):
488 for key, value in fragment.items():
489 if key in local:
490 _merge_dicts(local[key], value)
491 else:
492 local[key] = value
493 return
void print(char *figname, TCanvas *c1)
_configureAlg(self, block, blockConfig, configSeq=None, containerName=None, extraOptions=None)
__init__(self, yamlPath=None, *, config=None, addDefaultBlocks=True)
Definition ConfigText.py:40
saveYaml(self, filePath='config.yaml', default_flow_style=False, **kwargs)
_addNewConfigBlocks(self, modulePath, functionName, algName, defaults=None, pos=None, superBlocks=None)
cleanupPlaceholders(self, config)
Definition ConfigText.py:94
loadConfig(self, yamlPath=None, *, configDict=None)
printConfig(self, sort=False, jsonFormat=False)
addBlock(self, name, **kwargs)
preprocessConfig(self, config, algs)
Definition ConfigText.py:70
STL class.
T * get(TKey *tobj)
get a TObject* from a TKey* (why can't a TObject be a TKey?)
Definition hcg.cxx:130
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
Definition merge.py:1
makeSequence(configPath, *, flags=None, algSeq=None, noSystematics=None, dataType=None, geometry=None, autoconfigFromFlags=None, isPhyslite=None, noPhysliteBroken=False)
_merge_dicts(local, fragment)
_load_fragment(pathlib.Path fragment_path)
printYaml(d, sort=False, jsonFormat=False)
Definition ConfigText.py:31
readYaml(yamlPath)
Definition ConfigText.py:22
combineConfigFiles(local, config_path, fragment_key="include")
_find_fragment(fragment_path, config_path)