ATLAS Offline Software
Loading...
Searching...
No Matches
CPBaseRunner.py
Go to the documentation of this file.
1# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
2
3import argparse
4from AnaAlgorithm.Logging import logging
5from abc import ABC, abstractmethod
6from os import environ, pathsep
7from pathlib import Path
8
9
10class CPBaseRunner(ABC):
11 def __init__(self):
12 self.logger = logging.getLogger("CPBaseRunner")
13 self._args = None
14 self._inputList = None
16 self.rawConfig = None
17 # parse the arguments here is a bad idea
18
19 @property
20 def args(self):
21 if self._args is None:
22 self._args = self.parser.parse_args()
23 return self._args
24
25 @property
26 def inputList(self):
27 if self._inputList is None:
28 if self.args.input_list.endswith('.txt'):
29 self._inputList = CPBaseRunner._parseInputFileList(
30 Path(self.args.input_list))
31 elif ".root" in self.args.input_list:
32 self._inputList = [self.args.input_list]
33 else:
34 raise FileNotFoundError(f'Input file list \"{self.args.input_list}\" is not supported!'
35 'Please provide a text file with a list of input files or a single root file.')
36 self.logger.info("Initialized input files: %s", self._inputList)
37 return self._inputList
38
39 @property
40 def outputName(self):
41 if self.args.output_name.endswith('.root'):
42 return self.args.output_name[:-5]
43 else:
44 return self.args.output_name
45
46 def printFlags(self):
47 self.logger.info("="*73)
48 self.logger.info("="*20 + "FLAG CONFIGURATION" + "="*20)
49 self.logger.info("="*73)
50 self.logger.info(" Input files: %s", self.flags.Input.isMC)
51 self.logger.info(" RunNumber: %s",
52 self.flags.Input.RunNumbers)
53 self.logger.info(" MCCampaign: %s",
54 self.flags.Input.MCCampaign)
55 self.logger.info(" GeneratorInfo: %s",
56 self.flags.Input.GeneratorsInfo)
57 self.logger.info(" MaxEvents: %s", self.flags.Exec.MaxEvents)
58 self.logger.info(" SkipEvents: %s", self.flags.Exec.SkipEvents)
59 self.logger.info("="*73)
60
61 @abstractmethod
63 pass
64
65 @abstractmethod
66 def makeAlgSequence(self):
67 pass
68
69 @abstractmethod
70 def run(self):
71 pass
72
73 # The responsiblity of flag.lock will pass to the caller
75 from AthenaConfiguration.AllConfigFlags import initConfigFlags
76 flags = initConfigFlags()
77 flags.Input.Files = self.inputList
78 flags.Exec.MaxEvents = self.args.max_events
79 flags.Exec.SkipEvents = self.args.skip_n_events
80 flags.Exec.EventPrintoutInterval = 10000
81 return flags
82
84 parser = argparse.ArgumentParser(
85 description='Runscript for CP Algorithm unit tests')
86 baseGroup = parser.add_argument_group('Base Script Options')
87 baseGroup.add_argument('-i', '--input-list', dest='input_list',
88 help='path to text file containing list of input files, or a single root file')
89 baseGroup.add_argument('-o', '--output-name', dest='output_name', default='output',
90 help='output name of the analysis root file')
91 baseGroup.add_argument('-e', '--max-events', dest='max_events', type=int, default=-1,
92 help='Number of events to run')
93 baseGroup.add_argument('-t', '--text-config', dest='text_config',
94 help='path to the YAML configuration file. Tips: use atlas_install_data(path/to/*.yaml) in CMakeLists.txt can help locating the config just by the config file name.')
95 baseGroup.add_argument('--no-systematics', dest='no_systematics',
96 action='store_true', help='Disable systematics')
97 baseGroup.add_argument('--skip-n-events', dest='skip_n_events', type=int, default=0,
98 help='Skip the first N events in the run, not first N events for each file. This is meant for debugging only. \nIn Eventloop, this option disable the cutbookkeeper algorithms due to technical reasons, and can only be ran in direct-driver.')
99 baseGroup.add_argument('--merge-output-files', dest='merge_output_files', action='store_true', help='Merge the output histogram and n-tuple files into a single file.')
100 return parser
101
102 def _mergeYamlconfig(self, yaml_path: Path, yaml_paths: list[Path]):
103 with yaml_path.open("r", encoding="utf-8") as cfg_file:
104 import yaml
105 config_data = yaml.safe_load(cfg_file)
106 from AnalysisAlgorithmsConfig.ConfigText import combineConfigFiles
107 combined = combineConfigFiles(config_data, yaml_paths, fragment_key="include")
108 if combined:
109 with open("merged_config.yaml", "w") as cfg:
110 cfg.write(yaml.dump(config_data))
111 self.logger.info("Merged included fragments into main config.")
112 return config_data, combined
113
115 yamlConfig, yamlPaths = self._findYamlConfig(local=True)
116 if yamlConfig is None:
117 raise FileNotFoundError(f'Failed to locate \"{self.args.text_config}\" config file!'
118 'Check if you have a typo in -t/--text-config argument or missing file in the analysis configuration sub-directory.')
119 self.logger.info(f"Found YAML config at: {yamlConfig}")
120 self.logger.info("Setting up configuration based on YAML config:")
121
122 from AnalysisAlgorithmsConfig.ConfigText import TextConfig
123 self.rawConfig, merged = self._mergeYamlconfig(yamlConfig, yamlPaths)
124 self.modifyYamlConfig()
125 config = TextConfig(config=self.rawConfig)
126 return config
127
128 def _findYamlConfig(self, local=True):
129 # Find local and abs path first
130 if local and ((yamlConfig := CPBaseRunner.findLocalPathYamlConfig(self.args.text_config)) is not None):
131 return yamlConfig, [yamlConfig.parent if yamlConfig is not None else None]
132 # Then search in the analysis repository and warn for duplicates
133 else:
134 yamlConfig, yamlBasePath = CPBaseRunner.findRepoPathYamlConfig(self.args.text_config)
135 # Try the slowest method using AthenaCommon if nothing found
136 if not yamlConfig:
137 from AthenaCommon.Utils.unixtools import find_datafile
138 yamlConfig = find_datafile(self.args.text_config)
139 if yamlConfig is None:
140 return None, None
141 yamlConfig = Path(yamlConfig)
142 return yamlConfig, [yamlConfig.parent]
143
144 if len(yamlConfig) > 1:
145 raise FileExistsError(
146 f'Multiple files named \"{self.args.text_config}\" found in the analysis repository. Please provide a more specific path to the config file.\nMatches found:\n' + '\n'.join(yamlConfig))
147 else:
148 return yamlConfig[0], [yamlBasePath[0]]
149
150 @staticmethod
151 def findLocalPathYamlConfig(textConfigPath):
152 configPath = Path(textConfigPath).expanduser()
153 if configPath.is_absolute() and configPath.is_file():
154 return configPath
155 cwdPath = Path.cwd() / configPath
156 if cwdPath.is_file():
157 return cwdPath
158 return None
159
160 @staticmethod
161 def findRepoPathYamlConfig(textConfigPath):
162 """
163 Search for the file up to two levels deep within the first DATAPATH entry.
164 First, check directly under the analysis repository (depth 0).
165 Then, check immediate subdirectories (depth 1), looking for the file inside each.
166 Returns a list of all matches found and a list of packages/base paths.
167 """
168 matches = []
169 basePaths = []
170 analysisRepoPath = Path(environ.get("DATAPATH", "").split(pathsep)[0])
171 # Depth 0: Directly under analysisRepoPath
172 searchPath = analysisRepoPath / textConfigPath
173 if searchPath.is_file():
174 matches.append(searchPath)
175 basePaths.append(analysisRepoPath if searchPath.parent == analysisRepoPath else analysisRepoPath / Path(textConfigPath).parts[0])
176 # Depth 1: Inside immediate subdirectories
177 for subdir in analysisRepoPath.iterdir():
178 if not subdir.is_dir():
179 continue
180 candidate = analysisRepoPath / subdir / textConfigPath
181 if candidate.is_file():
182 matches.append(candidate)
183 basePaths.append(analysisRepoPath / subdir)
184 return matches, basePaths
185
186 def _parseInputFileList(path: Path):
187 files = []
188 with path.open('r') as inputText:
189 for line in inputText.readlines():
190 # Strip the line and skip comments and empty lines
191 line = line.strip()
192 if line.startswith('#') or not line:
193 continue
194 # On grid, input files are listed in one line separated by a comma
195 if ',' in line:
196 files += line.split(',')
197 continue
198 line_path = Path(line)
199 if line_path.is_dir():
200 if not any(line_path.iterdir()):
201 raise FileNotFoundError(
202 f"The directory \"{path}\" is empty. Please provide a directory with .root files.")
203 for root_file in line_path.iterdir():
204 if '.root' in root_file.suffixes:
205 files.append(str(root_file))
206 # This also catches grid input with only one file
207 else:
208 files.append(line)
209
210 # Remove leading/trailing whitespaces from file names
211 files = [file.strip() for file in files]
212 if not files:
213 raise FileNotFoundError(
214 f"No valid input files found in \"{path}\"! Please check the input file list.")
215 return files
216
217 def setup(self):
219 self.parser.parse_args()
222
224 self.parser.description = 'CPRunScript available arguments'
225 self.parser.usage = argparse.SUPPRESS
226 self.parser.print_help()
227
228 # Three customization hooks will be ran in the order below
229 # First: modify parser arguments, have access to self.parser, and self.flags (Athena flags or EL flags)
230 # Second: modify Yaml config, have access to self.rawConfig, and self.flags, self.parser, self.config
231 # Third: modify algorithm sequence, have access to self.flags, self.config, self.args, and self.parser, , (self.algseq / self.configSeq)
232 def modifyParserArguments(self): # noqa: B027
233 # Example: self.parser.add_argument('--no-filter', dest='no_filter', action='store_true', help='Disable filtering')
234 # The seemingly trivial log is to prevent CI from complaining about empty hook functions
235 pass
236
237 def modifyYamlConfig(self): # noqa: B027
238 # Example: self.rawConfig['SomeSection']['SomeOption'] = some_value
239 # The seemingly trivial log is to prevent CI from complaining about empty hook functions
240 pass
241
242 def modifyAlgSequence(self): # noqa: B027
243 # For AthAnalysis: self.configSeq.some_attribute = some_value
244 # For EventLoop: self.algSeq.some_attribute = some_value
245 # The seemingly trivial log is to prevent CI from complaining about empty hook functions
246 pass
_mergeYamlconfig(self, Path yaml_path, list[Path] yaml_paths)
findLocalPathYamlConfig(textConfigPath)
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:179