ATLAS Offline Software
ConfigText.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2 #
3 # @author Joseph Lambert
4 
5 import yaml
6 import json
7 import os
8 import sys
9 import importlib
10 import pathlib
11 import warnings
12 
13 from AnalysisAlgorithmsConfig.ConfigSequence import ConfigSequence
14 from AnalysisAlgorithmsConfig.ConfigFactory import ConfigFactory
15 from AnalysisAlgorithmsConfig.ConfigAccumulator import deprecationWarningCategory
16 
17 from AnaAlgorithm.Logging import logging
18 logCPAlgTextCfg = logging.getLogger('CPAlgTextCfg')
19 
20 
21 def readYaml(yamlPath):
22  """Loads YAML file into a dictionary"""
23  if not os.path.isfile(yamlPath):
24  raise ValueError(f"{yamlPath} is not a file.")
25  with open(yamlPath, 'r') as f:
26  textConfig = yaml.safe_load(f)
27  return textConfig
28 
29 
30 def printYaml(d, sort=False, jsonFormat=False):
31  """Prints a dictionary as YAML"""
32  print(yaml.dump(d, default_flow_style=jsonFormat, sort_keys=sort))
33 
34 class TextConfigWarning(FutureWarning):
35  pass
36 
37 
38 class TextConfig(ConfigFactory):
39  def __init__(self, yamlPath=None, *, config=None, addDefaultBlocks=True):
40  super().__init__(addDefaultBlocks=False)
41 
42  if yamlPath and config:
43  raise ValueError("Cannot specify both yamlPath and config. Use one or the other.")
44 
45  # Block to add new blocks to this object
46  self.addAlgConfigBlock(algName="AddConfigBlocks", alg=self._addNewConfigBlocks,
47  defaults={'self': self})
48  # add default blocks
49  if addDefaultBlocks:
50  self.addDefaultAlgs()
51  # load yaml
52  self._config = {}
53  # do not allow for loading multiple yaml files
54  self.__loadedYaml = False
55  if yamlPath is not None or config is not None:
56  self.loadConfig(yamlPath, configDict=config)
57  # last is used for setOptionValue when using addBlock
58  self._last = None
59 
60 
61  def setConfig(self, config):
62  """Print YAML configuration file."""
63  if self._config:
64  raise ValueError("Configuration has already been loaded.")
65  self._config = config
66  return
67 
68  # Less-than-ideal fix introduced in !76767
69  def preprocessConfig(self, config, algs):
70  """
71  Preprocess the configuration dictionary.
72  Ensure blocks with only sub-blocks are initialized with an empty dictionary.
73  """
74  def processNode(node, algs):
75  if not isinstance(node, dict):
76  return # Base case: not a dictionary
77  for blockName, blockContent in list(node.items()):
78  # If the block name is recognized in algs
79  if blockName in algs:
80  # If the block only defines sub-blocks, initialize it
81  if isinstance(blockContent, dict) and not any(
82  key in algs[blockName].options for key in blockContent
83  ):
84  # Ensure parent block is initialized as an empty dictionary
85  node[blockName] = {'__placeholder__': True, **blockContent}
86  # Recurse into sub-blocks
87  processNode(node[blockName], algs[blockName].subAlgs)
88 
89  # Start processing from the root of the configuration
90  processNode(config, algs)
91 
92  # Less-than-ideal fix introduced in !76767
93  def cleanupPlaceholders(self, config):
94  """
95  Remove placeholder markers after initialization.
96  """
97  if not isinstance(config, dict):
98  return
99  if "__placeholder__" in config:
100  del config["__placeholder__"]
101  for key, value in config.items():
102  self.cleanupPlaceholders(value)
103 
104  def loadConfig(self, yamlPath=None, *, configDict=None):
105  """
106  read a YAML file. Will combine with any config blocks added using python
107  """
108  if self.__loadedYaml or isinstance(yamlPath, list):
109  raise NotImplementedError("Mering multiple yaml files is not implemented.")
110  self.__loadedYaml = True
111 
112  def merge(config, algs, path=''):
113  """Add to config block-by-block"""
114  if not isinstance(config, list):
115  config = [config]
116  # loop over list of blocks with same block name
117  for blocks in config:
118  # deal with case where empty dict is config
119  if blocks == {} and path:
120  self.addBlock(path)
121  return
122  # remove any subBlocks from block config
123  subBlocks = {}
124  for blockName in algs:
125  if blockName in blocks:
126  subBlocks[blockName] = blocks.pop(blockName)
127  # anything left should be a block and it's configuration
128  if blocks:
129  self.addBlock(path, **blocks)
130  # add in any subBlocks
131  for subName, subBlock in subBlocks.items():
132  newPath = f'{path}.{subName}' if path else subName
133  merge(subBlock, algs[subName].subAlgs, newPath)
134  return
135 
136  logCPAlgTextCfg.info(f'loading {yamlPath}')
137  if configDict is not None:
138  # if configDict is provided, use it directly
139  config = configDict
140  else:
141  config = readYaml(yamlPath)
142  # check if blocks are defined in yaml file
143  if "AddConfigBlocks" in config:
144  self._configureAlg(self._algs["AddConfigBlocks"], config["AddConfigBlocks"])
145 
146  # Preprocess the configuration dictionary (see !76767)
147  self.preprocessConfig(config, self._algs)
148 
149  merge(config, self._algs)
150 
151  # Cleanup placeholders (see !76767)
152  self.cleanupPlaceholders(config)
153 
154  return
155 
156 
157  def printConfig(self, sort=False, jsonFormat=False):
158  """Print YAML configuration file."""
159  if self._config is None:
160  raise ValueError("No configuration has been loaded.")
161  printYaml(self._config, sort, jsonFormat)
162  return
163 
164 
165  def saveYaml(self, filePath='config.yaml', default_flow_style=False,
166  **kwargs):
167  """
168  Convert dictionary representation to yaml and save
169  """
170  logCPAlgTextCfg.info(f"Saving configuration to {filePath}")
171  config = self._config
172  with open(filePath, 'w') as outfile:
173  yaml.dump(config, outfile, default_flow_style=False, **kwargs)
174  return
175 
176 
177  def addBlock(self, name, **kwargs):
178  """
179  Create entry into dictionary representing the text configuration
180  """
181  def setEntry(name, config, opts):
182  if '.' not in name:
183  if name not in config:
184  config[name] = opts
185  elif isinstance(config[name], list):
186  config[name].append(opts)
187  else:
188  config[name] = [config[name], opts]
189  # set last added block for setOptionValue
190  self._last = opts
191  else:
192  name, rest = name[:name.index('.')], name[name.index('.') + 1:]
193  config = config[name]
194  if isinstance(config, list):
195  config = config[-1]
196  setEntry(rest, config, opts)
197  return
198  setEntry(name, self._config, dict(kwargs))
199  return
200 
201 
202  def setOptions(self, **kwargs):
203  """
204  Set option(s) for the lsat block that was added. If an option
205  was added previously, will update value
206  """
207  if self._last is None:
208  raise TypeError("Cannot set options before adding a block")
209  # points to dict with opts for last added block
210  self._last.update(**kwargs)
211 
212 
213  def configure(self):
214  """Process YAML configuration file and confgure added algorithms."""
215  # make sure all blocks in yaml file are added (otherwise they would be ignored)
216  for blockName in self._config:
217  if blockName not in self._order[self.ROOTNAME]:
218  if not blockName:
219  blockName = list(self._config[blockName].keys())[0]
220  raise ValueError(f"Unkown block {blockName} in yaml file")
221 
222  # configure blocks
223  configSeq = ConfigSequence()
224  for blockName in self._order[self.ROOTNAME]:
225  if blockName == "AddConfigBlocks":
226  continue
227 
228  assert blockName in self._algs
229 
230  # order only applies to root blocks
231  if blockName in self._config:
232  blockConfig = self._config[blockName]
233  alg = self._algs[blockName]
234  self._configureAlg(alg, blockConfig, configSeq)
235  else:
236  continue
237  return configSeq
238 
239 
240  def _addNewConfigBlocks(self, modulePath, functionName,
241  algName, defaults=None, pos=None, superBlocks=None):
242  """
243  Load <functionName> from <modulePath>
244  """
245  try:
246  module = importlib.import_module(modulePath)
247  fxn = getattr(module, functionName)
248  except ModuleNotFoundError as e:
249  raise ModuleNotFoundError(f"{e}\nFailed to load {functionName} from {modulePath}")
250  else:
251  sys.modules[functionName] = fxn
252  # add new algorithm to available algorithms
253  self.addAlgConfigBlock(algName=algName, alg=fxn,
254  defaults=defaults,
255  superBlocks=superBlocks,
256  pos=pos)
257  return
258 
259 
260  def _configureAlg(self, block, blockConfig, configSeq=None, containerName=None,
261  extraOptions=None):
262  if not isinstance(blockConfig, list):
263  blockConfig = [blockConfig]
264 
265  for options in blockConfig:
266  # Special case: propogate containerName down to subAlgs
267  if 'containerName' in options:
268  containerName = options['containerName']
269  elif containerName is not None and 'containerName' not in options:
270  options['containerName'] = containerName
271  # will check which options are associated alg and not options
272  logCPAlgTextCfg.info(f"Configuring {block.algName}")
273  seq, funcOpts = block.makeConfig(options)
274  if not seq._blocks:
275  continue
276  algOpts = seq.setOptions(options)
277  # If containerName was not set explicitly, we can now retrieve
278  # its default value
279  if containerName is None:
280  for opt in algOpts:
281  if 'name' in opt and opt['name'] == 'containerName':
282  containerName = opt.get('value', None)
283  break # Exit the loop as we've found the key
284 
285  if configSeq is not None:
286  configSeq += seq
287 
288  # propagate special extra options to subalgs
289  if extraOptions is None:
290  extraOptionsList = ["skipOnData", "skipOnMC", "onlyForDSIDs"]
291  for i in algOpts:
292  if i['name'] in extraOptionsList and i['defaultValue'] != i['value']:
293  if extraOptions is None:
294  extraOptions = {}
295  extraOptions[i['name']] = i['value']
296  else:
297  algOpts = seq.setOptions(extraOptions.copy())
298 
299  # check to see if there are unused parameters
300  algOpts = [i['name'] for i in algOpts]
301  expectedOptions = set(funcOpts)
302  expectedOptions |= set(algOpts)
303  expectedOptions |= set(block.subAlgs)
304 
305  difference = set(options.keys()) - expectedOptions
306  difference.discard('__placeholder__')
307  if difference:
308  difference = "\n".join(difference)
309  raise ValueError(f"There are options set that are not used for "
310  f"{block.algName}:\n{difference}\n"
311  "Please check your configuration.")
312 
313  # check for sub-blocks and call this function recursively
314  for alg in self._order.get(block.algName, []):
315  if alg in options:
316  subAlg = block.subAlgs[alg]
317  self._configureAlg(subAlg, options[alg], configSeq, containerName, extraOptions)
318  return configSeq
319 
320 
321 def makeSequence(configPath, *, flags=None, algSeq=None, noSystematics=None, dataType=None, geometry=None, autoconfigFromFlags=None, isPhyslite=None, noPhysliteBroken=False):
322  """
323  """
324 
325  # Historically we have used the identifier
326  # `autoconfigFromFlags`, but in the rest of the code base
327  # `flags` is used. So for now we allow either, and can hopefully
328  # at some point remove the former (21 Aug 25).
329  if autoconfigFromFlags is not None:
330  if flags is not None:
331  raise ValueError("Cannot pass both flags and autoconfigFromFlags arguments")
332  flags = autoconfigFromFlags
333  warnings.warn ('Using autoconfigFromFlags parameter is deprecated, use flags instead', category=deprecationWarningCategory, stacklevel=2)
334  elif flags is None:
335  warnings.warn ('it is deprecated to configure meta-data for analysis configuration manually, please read the configuration flags via the meta-data reader', category=deprecationWarningCategory, stacklevel=2)
336 
337  from AnalysisAlgorithmsConfig.ConfigAccumulator import ConfigAccumulator
338 
339  config = TextConfig(configPath)
340 
341  logCPAlgTextCfg.info("Configuration file read in:")
342  config.printConfig()
343 
344  logCPAlgTextCfg.info("Default algorithms:")
345  config.printAlgs(printOpts=True)
346 
347  logCPAlgTextCfg.info("Configuring algorithms based on YAML file:")
348  configSeq = config.configure()
349 
350  # defaults are added to config as algs are configured
351  logCPAlgTextCfg.info("Configuration used:")
352  config.printConfig()
353 
354  # compile
355  configAccumulator = ConfigAccumulator(algSeq=algSeq, dataType=dataType, isPhyslite=isPhyslite, geometry=geometry, autoconfigFromFlags=autoconfigFromFlags, flags=flags, noSystematics=noSystematics)
356  configSeq.fullConfigure(configAccumulator)
357 
358  # blocks can be reordered during configSeq.fullConfigure
359  logCPAlgTextCfg.info("ConfigBlocks and their configuration:")
360  configSeq.printOptions()
361 
362  from AnaAlgorithm.DualUseConfig import isAthena, useComponentAccumulator
363  if isAthena and useComponentAccumulator:
364  return configAccumulator.CA
365  else:
366  return None
367 
368 
369 # Combine configuration files
370 #
371 # See the README for more info on how this works
372 #
373 def combineConfigFiles(local, config_path, fragment_key="include"):
374  """
375  Recursively combine configuration fragments into `local`.
376 
377  - Looks for `fragment_key` at any dict node.
378  - If value is a string/path: merge that fragment.
379  - If value is a list: merge all fragments in order.
380  For conflicts between fragments, the **earlier** file in the list wins.
381  Local keys still override the merged fragments.
382 
383  Returns True if any merging happened below this node.
384  """
385  combined = False
386 
387  # If this isn't an iterable there's nothing to combine
388  if isinstance(local, dict):
389  to_combine = local.values()
390  elif isinstance(local, list):
391  to_combine = local
392  else:
393  return combined
394 
395  # Recurse first so that nested nodes are resolved
396  for sub in to_combine:
397  combined = combineConfigFiles(sub, config_path, fragment_key=fragment_key) or combined
398 
399  # if there are no fragments to include we're done
400  if fragment_key not in local:
401  return combined
402 
403  # Only dict nodes can have include keys
404  if not isinstance(local, dict):
405  return combined
406 
407  # Normalize to a list of paths
408  value = local[fragment_key]
409  if isinstance(value, (str, pathlib.Path)):
410  warnings.warn(
411  f"{fragment_key} should be followed with a list of files",
412  TextConfigWarning,
413  stacklevel=2,
414  )
415  paths = [value]
416  elif isinstance(value, list):
417  paths = value
418  else:
419  raise TypeError(f"'{fragment_key}' must be a string path or a list of paths, got {type(value).__name__}")
420 
421  # Build an accumulator of all fragments, earlier paths win on conflicts
422  fragments_acc = {}
423  for entry in paths:
424  fragment_path = _find_fragment(pathlib.Path(entry), config_path)
425  fragment = _load_fragment(fragment_path)
426 
427  # Allow recursion inside each fragment, using the fragment's directory as base
428  combineConfigFiles(fragment, fragment_path.parent, fragment_key=fragment_key)
429 
430  # Merge this fragment into the accumulator; earlier entries win
431  _merge_dicts(fragments_acc, fragment)
432 
433  # Remove the key before merging to avoid re-processing it
434  del local[fragment_key]
435 
436  # Merge fragments into local; local values take precedence
437  _merge_dicts(local, fragments_acc)
438 
439  return True
440 
441 
442 def _load_fragment(fragment_path: pathlib.Path):
443  """Load a YAML or JSON fragment
444 
445  This function is superfluous as of the yaml 1.2 spec (which
446  has not been implemented in ATLAS Yaml dependencies).
447  Once https://github.com/yaml/pyyaml/issues/173 is resolved
448  pyyaml will support yaml 1.2, which is compatable with json.
449  Until then yaml and json behave differently in some scientific
450  notation edge cases.
451  """
452 
453  with open(fragment_path, 'r') as fragment_file:
454  if fragment_path.suffix.lower() == '.json':
455  return json.load(fragment_file)
456  else:
457  return yaml.safe_load(fragment_file)
458 
459 def _find_fragment(fragment_path, config_path):
460  paths_to_check = [
461  fragment_path,
462  config_path / fragment_path,
463  *[x / fragment_path for x in os.environ["DATAPATH"].split(":")]
464  ]
465  for path in paths_to_check:
466  if path.exists():
467  return path
468 
469  raise FileNotFoundError(fragment_path)
470 
471 
472 def _merge_dicts(local, fragment):
473  # In the list case append the fragment to the local list
474  if isinstance(local, list):
475  local += fragment
476  return
477  # In the dict case, append only missing values to local: the local
478  # values take precedence over the fragment ones.
479  if isinstance(local, dict):
480  for key, value in fragment.items():
481  if key in local:
482  _merge_dicts(local[key], value)
483  else:
484  local[key] = value
485  return
python.ConfigText.TextConfig
Definition: ConfigText.py:38
python.ConfigText.combineConfigFiles
def combineConfigFiles(local, config_path, fragment_key="include")
Definition: ConfigText.py:373
python.ConfigText.TextConfig.saveYaml
def saveYaml(self, filePath='config.yaml', default_flow_style=False, **kwargs)
Definition: ConfigText.py:165
dumpHVPathFromNtuple.append
bool append
Definition: dumpHVPathFromNtuple.py:91
python.ConfigText.TextConfig.cleanupPlaceholders
def cleanupPlaceholders(self, config)
Definition: ConfigText.py:93
python.ConfigText.readYaml
def readYaml(yamlPath)
Definition: ConfigText.py:21
python.ConfigText.TextConfig._configureAlg
def _configureAlg(self, block, blockConfig, configSeq=None, containerName=None, extraOptions=None)
Definition: ConfigText.py:260
python.ConfigText.TextConfig.configure
def configure(self)
Definition: ConfigText.py:213
python.ConfigText.TextConfig._config
_config
Definition: ConfigText.py:52
python.ConfigText.TextConfig.setOptions
def setOptions(self, **kwargs)
Definition: ConfigText.py:202
python.ConfigText._find_fragment
def _find_fragment(fragment_path, config_path)
Definition: ConfigText.py:459
python.ConfigText.printYaml
def printYaml(d, sort=False, jsonFormat=False)
Definition: ConfigText.py:30
python.ConfigText.TextConfigWarning
Definition: ConfigText.py:34
python.ConfigText.TextConfig.__init__
def __init__(self, yamlPath=None, *config=None, addDefaultBlocks=True)
Definition: ConfigText.py:39
python.ConfigText.TextConfig.__loadedYaml
__loadedYaml
Definition: ConfigText.py:54
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:26
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.ConfigText.TextConfig.preprocessConfig
def preprocessConfig(self, config, algs)
Definition: ConfigText.py:69
Trk::open
@ open
Definition: BinningType.h:40
python.ConfigText.makeSequence
def makeSequence(configPath, *flags=None, algSeq=None, noSystematics=None, dataType=None, geometry=None, autoconfigFromFlags=None, isPhyslite=None, noPhysliteBroken=False)
Definition: ConfigText.py:321
python.ConfigText._load_fragment
def _load_fragment(pathlib.Path fragment_path)
Definition: ConfigText.py:442
get
T * get(TKey *tobj)
get a TObject* from a TKey* (why can't a TObject be a TKey?)
Definition: hcg.cxx:127
python.utility.LHE.merge
def merge(input_file_pattern, output_file)
Merge many input LHE files into a single output file.
Definition: LHE.py:29
python.ConfigText._merge_dicts
def _merge_dicts(local, fragment)
Definition: ConfigText.py:472
python.ConfigText.TextConfig.loadConfig
def loadConfig(self, yamlPath=None, *configDict=None)
Definition: ConfigText.py:104
python.ConfigText.TextConfig.setConfig
def setConfig(self, config)
Definition: ConfigText.py:61
python.Bindings.keys
keys
Definition: Control/AthenaPython/python/Bindings.py:801
python.ConfigText.TextConfig.printConfig
def printConfig(self, sort=False, jsonFormat=False)
Definition: ConfigText.py:157
python.ConfigText.TextConfig.addBlock
def addBlock(self, name, **kwargs)
Definition: ConfigText.py:177
python.ConfigText.TextConfig._addNewConfigBlocks
def _addNewConfigBlocks(self, modulePath, functionName, algName, defaults=None, pos=None, superBlocks=None)
Definition: ConfigText.py:240
Trk::split
@ split
Definition: LayerMaterialProperties.h:38
merge
Definition: merge.py:1
python.ConfigText.TextConfig._last
_last
Definition: ConfigText.py:58