ATLAS Offline Software
Loading...
Searching...
No Matches
CPBaseRunner.py
Go to the documentation of this file.
1# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
2
3import argparse
4from AnaAlgorithm.Logging import logging
5from abc import ABC, abstractmethod
6from os import environ, pathsep
7from pathlib import Path
8
9
10class CPBaseRunner(ABC):
11 def __init__(self):
12 self.logger = logging.getLogger("CPBaseRunner")
13 self._args = None
14 self._inputList = None
16 self.rawConfig = None
17 # parse the arguments here is a bad idea
18
19 @property
20 def args(self):
21 if self._args is None:
22 self._args = self.parser.parse_args()
23 return self._args
24
25 @property
26 def inputList(self):
27 if self._inputList is None:
28 if self.args.input_list.endswith('.txt'):
29 self._inputList = CPBaseRunner._parseInputFileList(
30 Path(self.args.input_list))
31 elif ".root" in self.args.input_list:
32 self._inputList = [self.args.input_list]
33 else:
34 raise FileNotFoundError(f'Input file list \"{self.args.input_list}\" is not supported!'
35 'Please provide a text file with a list of input files or a single root file.')
36 self.logger.info("Initialized input files: %s", self._inputList)
37 return self._inputList
38
39 @property
40 def outputName(self):
41 if self.args.output_name.endswith('.root'):
42 return self.args.output_name[:-5]
43 else:
44 return self.args.output_name
45
46 def printFlags(self):
47 self.logger.info("="*73)
48 self.logger.info("="*20 + "FLAG CONFIGURATION" + "="*20)
49 self.logger.info("="*73)
50 self.logger.info(" Input files: %s", self.flags.Input.isMC)
51 self.logger.info(" RunNumber: %s",
52 self.flags.Input.RunNumbers)
53 self.logger.info(" MCCampaign: %s",
54 self.flags.Input.MCCampaign)
55 self.logger.info(" GeneratorInfo: %s",
56 self.flags.Input.GeneratorsInfo)
57 self.logger.info(" MaxEvents: %s", self.flags.Exec.MaxEvents)
58 self.logger.info(" SkipEvents: %s", self.flags.Exec.SkipEvents)
59 self.logger.info("="*73)
60
61 @abstractmethod
63 pass
64
65 @abstractmethod
66 def makeAlgSequence(self):
67 pass
68
69 @abstractmethod
70 def run(self):
71 pass
72
73 # The responsiblity of flag.lock will pass to the caller
75 from AthenaConfiguration.AllConfigFlags import initConfigFlags
76 flags = initConfigFlags()
77 flags.Input.Files = self.inputList
78 flags.Exec.MaxEvents = self.args.max_events
79 flags.Exec.SkipEvents = self.args.skip_n_events
80 flags.Exec.EventPrintoutInterval = 10000
81 return flags
82
84 parser = argparse.ArgumentParser(
85 description='Runscript for CP Algorithm unit tests')
86 baseGroup = parser.add_argument_group('Base Script Options')
87 baseGroup.add_argument('-i', '--input-list', dest='input_list',
88 help='path to text file containing list of input files, or a single root file')
89 baseGroup.add_argument('-o', '--output-name', dest='output_name', default='output',
90 help='output name of the analysis root file')
91 baseGroup.add_argument('-e', '--max-events', dest='max_events', type=int, default=-1,
92 help='Number of events to run')
93 baseGroup.add_argument('-t', '--text-config', dest='text_config',
94 help='path to the YAML configuration file. Tips: use atlas_install_data(path/to/*.yaml) in CMakeLists.txt can help locating the config just by the config file name.')
95 baseGroup.add_argument('--no-systematics', dest='no_systematics',
96 action='store_true', help='Disable systematics')
97 baseGroup.add_argument('--skip-n-events', dest='skip_n_events', type=int, default=0,
98 help='Skip the first N events in the run, not first N events for each file. This is meant for debugging only. \nIn Eventloop, this option disable the cutbookkeeper algorithms due to technical reasons, and can only be ran in direct-driver.')
99 baseGroup.add_argument('--merge-output-files', dest='merge_output_files', action='store_true', help='Merge the output histogram and n-tuple files into a single file.')
100 return parser
101
102 def _mergeYamlconfig(self, yaml_path: Path, yaml_paths: list[Path]):
103 with yaml_path.open("r", encoding="utf-8") as cfg_file:
104 import yaml
105 config_data = yaml.safe_load(cfg_file)
106 from AnalysisAlgorithmsConfig.ConfigText import combineConfigFiles
107 combined = combineConfigFiles(config_data, yaml_paths, fragment_key="include")
108 if combined:
109 with open("merged_config.yaml", "w") as cfg:
110 cfg.write(yaml.dump(config_data))
111 self.logger.info("Merged included fragments into main config.")
112 return config_data, combined
113
115 yamlConfig, yamlPaths = self._findYamlConfig(local=True)
116 if yamlConfig is None:
117 raise FileNotFoundError(f'Failed to locate \"{self.args.text_config}\" config file!'
118 'Check if you have a typo in -t/--text-config argument or missing file in the analysis configuration sub-directory.')
119 self.logger.info(f"Found YAML config at: {yamlConfig}")
120 self.logger.info("Setting up configuration based on YAML config:")
121
122 from AnalysisAlgorithmsConfig.ConfigText import TextConfig
123 self.rawConfig, merged = self._mergeYamlconfig(yamlConfig, yamlPaths)
124 self.modifyYamlConfig()
125 config = TextConfig(config=self.rawConfig)
126 return config
127
128 def _findYamlConfig(self, local=True):
129 # Find local and abs path first
130 if local and ((yamlConfig := CPBaseRunner.findLocalPathYamlConfig(self.args.text_config)) is not None):
131 return yamlConfig, [yamlConfig.parent]
132 # Then search in the analysis repository and warn for duplicates
133 else:
134 yamlConfig, yamlBasePath = CPBaseRunner.findRepoPathYamlConfig(self.args.text_config)
135 # Try the slowest method using AthenaCommon if nothing found
136 if not yamlConfig:
137 from AthenaCommon.Utils.unixtools import find_datafile
138 return find_datafile(self.args.text_config)
139
140 if len(yamlConfig) > 1:
141 raise FileExistsError(
142 f'Multiple files named \"{self.args.text_config}\" found in the analysis repository. Please provide a more specific path to the config file.\nMatches found:\n' + '\n'.join(yamlConfig))
143 else:
144 return yamlConfig[0], [yamlBasePath[0]]
145
146 @staticmethod
147 def findLocalPathYamlConfig(textConfigPath):
148 configPath = Path(textConfigPath).expanduser()
149 if configPath.is_absolute() and configPath.is_file():
150 return configPath
151 cwdPath = Path.cwd() / configPath
152 if cwdPath.is_file():
153 return cwdPath
154 return None
155
156 @staticmethod
157 def findRepoPathYamlConfig(textConfigPath):
158 """
159 Search for the file up to two levels deep within the first DATAPATH entry.
160 First, check directly under the analysis repository (depth 0).
161 Then, check immediate subdirectories (depth 1), looking for the file inside each.
162 Returns a list of all matches found and a list of packages/base paths.
163 """
164 matches = []
165 basePaths = []
166 analysisRepoPath = Path(environ.get("DATAPATH", "").split(pathsep)[0])
167 # Depth 0: Directly under analysisRepoPath
168 searchPath = analysisRepoPath / textConfigPath
169 if searchPath.is_file():
170 matches.append(searchPath)
171 basePaths.append(analysisRepoPath if searchPath.parent == analysisRepoPath else analysisRepoPath / Path(textConfigPath).parts[0])
172 # Depth 1: Inside immediate subdirectories
173 for subdir in analysisRepoPath.iterdir():
174 if not subdir.is_dir():
175 continue
176 candidate = analysisRepoPath / subdir / textConfigPath
177 if candidate.is_file():
178 matches.append(candidate)
179 basePaths.append(analysisRepoPath / subdir)
180 return matches, basePaths
181
182 def _parseInputFileList(path: Path):
183 files = []
184 with path.open('r') as inputText:
185 for line in inputText.readlines():
186 # Strip the line and skip comments and empty lines
187 line = line.strip()
188 if line.startswith('#') or not line:
189 continue
190 # On grid, input files are listed in one line separated by a comma
191 if ',' in line:
192 files += line.split(',')
193 continue
194 line_path = Path(line)
195 if line_path.is_dir():
196 if not any(line_path.iterdir()):
197 raise FileNotFoundError(
198 f"The directory \"{path}\" is empty. Please provide a directory with .root files.")
199 for root_file in line_path.iterdir():
200 if root_file.suffix == '.root':
201 files.append(str(root_file))
202 # This also catches grid input with only one file
203 else:
204 files.append(line)
205
206 # Remove leading/trailing whitespaces from file names
207 files = [file.strip() for file in files]
208 return files
209
210 def setup(self):
212 self.parser.parse_args()
215
217 self.parser.description = 'CPRunScript available arguments'
218 self.parser.usage = argparse.SUPPRESS
219 self.parser.print_help()
220
221 # Three customization hooks will be ran in the order below
222 # First: modify parser arguments, have access to self.parser, and self.flags (Athena flags or EL flags)
223 # Second: modify Yaml config, have access to self.rawConfig, and self.flags, self.parser, self.config
224 # Third: modify algorithm sequence, have access to self.flags, self.config, self.args, and self.parser, , (self.algseq / self.configSeq)
225 def modifyParserArguments(self): # noqa: B027
226 # Example: self.parser.add_argument('--no-filter', dest='no_filter', action='store_true', help='Disable filtering')
227 # The seemingly trivial log is to prevent CI from complaining about empty hook functions
228 pass
229
230 def modifyYamlConfig(self): # noqa: B027
231 # Example: self.rawConfig['SomeSection']['SomeOption'] = some_value
232 # The seemingly trivial log is to prevent CI from complaining about empty hook functions
233 pass
234
235 def modifyAlgSequence(self): # noqa: B027
236 # For AthAnalysis: self.configSeq.some_attribute = some_value
237 # For EventLoop: self.algSeq.some_attribute = some_value
238 # The seemingly trivial log is to prevent CI from complaining about empty hook functions
239 pass
_mergeYamlconfig(self, Path yaml_path, list[Path] yaml_paths)
findLocalPathYamlConfig(textConfigPath)
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177