ATLAS Offline Software
Loading...
Searching...
No Matches
python.update_ci_reference_files Namespace Reference

Classes

class  CITest

Functions

 process_log_file (url, branch, test_name)
 process_diffpool_change (text, ami_tag, mr_number, human_readable_date, test_name)
 process_digest_change (text, ami_tag, mr_number, human_readable_date, test_name)
 update_reference_files (actually_update=True, update_local_files=False)
 create_dir_and_copy_refs (test, actually_update=False)
 process_CI_Tests_json (data)
 strip_url (href)
 strip_href (href)
 process_CI_Builds_Summary (project)
 extract_links_from_json (url)
 summarise_failing_tests (check_for_duplicates=True)

Variables

 failing_tests = defaultdict(list)
list dirs_created = []
bool debug = False
 parser
 help
 action
 args = parser.parse_args()
 mr_url = summarise_failing_tests(not args.test_run)
 gl_project = gitlab.Gitlab("https://gitlab.cern.ch").projects.get("atlas/athena")
 mr = gl_project.mergerequests.get(mr_url.split('/')[-1])
 author = mr.author['username']
str remote = f'https://:@gitlab.cern.ch:8443/{author}/athena.git'
str local_branch = f'mr-{mr.iid}'
str msg = 'Would you like to (locally) update digest ref files and/or versions in References.py?'
bool update_local_files = False
 not_in_athena_dir = subprocess.call("git rev-parse --is-inside-work-tree", shell=True)
 commands = update_reference_files(not args.test_run, update_local_files)

Detailed Description

Updates reference files for a given MR, as well as related files (digest ref files, References.py)

This script should be run in the root directory of the athena repository, 
and you should pass in the URL of "CI Builds Summary" page for the MR you are interested in.
i.e. the link that you get from the MR under "Full details available on <this CI monitor view>"

So, for example, if you are interested in MR 66303, you would run this script as follows:
Tools/PROCTools/scripts/update_ci_reference_files.py https://bigpanda.cern.ch/ciview/?rel=MR-63410-2023-10-09-12-27

Running with --test-run will modify local files (so you can test that the changes make sense), and will also print out the commands which would have been executed. Nothing remote is changed! 
This is a good way to check that the proposed changes look rational before actually making in earnest.

Function Documentation

◆ create_dir_and_copy_refs()

python.update_ci_reference_files.create_dir_and_copy_refs ( test,
actually_update = False )
If called with actually_update=False, this function will return a list of commands which would have been executed.

Definition at line 291 of file update_ci_reference_files.py.

291def create_dir_and_copy_refs(test, actually_update=False):
292 """
293 If called with actually_update=False, this function will return a list of commands which would have been executed.
294 """
295 commands = []
296 if test.new_version_directory not in dirs_created:
297 commands.append("mkdir -p " + test.new_version_directory)
298 dirs_created.append(test.new_version_directory)
299
300 # Copy new directory first, then copy old (in case the new MR did not touch all files)
301 # Important! Use no-clobber for second copy or we will overwrite the new data with old!
302 commands.append("cp " + test.copied_file_path + "* "+ test.new_version_directory+"/")
303 commands.append("cp -n " + test.existing_ref + "/* "+ test.new_version_directory+"/")
304 if actually_update:
305 print(' -> Copying files from {} to {}'.format(test.copied_file_path, test.new_version_directory))
306 try:
307 for command in commands:
308 try:
309 subprocess.call( command, shell=True)
310 except Exception as e:
311 print('Command failed due to:', e)
312 print('Do you have EOS available on this machine?')
313 except Exception as e:
314 print('FATAL: Unable to copy files due to:', e)
315 sys.exit(1)
316
317 f = open(test.new_version_directory+'/info.txt', 'w')
318 f.write('Merge URL: https://gitlab.cern.ch/atlas/athena/-/merge_requests/{}\n'.format(test.mr))
319 f.write('Date: {}\n'.format(test.date))
320 f.write('AMI: {}\n'.format(test.tag))
321 f.write('Test name: {}\n'.format(test.name))
322 f.write('Files copied from: {}\n'.format(test.copied_file_path))
323 f.close()
324
325 return commands
326
void print(char *figname, TCanvas *c1)

◆ extract_links_from_json()

python.update_ci_reference_files.extract_links_from_json ( url)

Definition at line 358 of file update_ci_reference_files.py.

358def extract_links_from_json(url):
359 headers = {'Accept': 'application/json'}
360 r = requests.get(url+'&json', headers=headers)
361 data = r.json()["rows_s"]
362 # First row is header.
363 # Currently this is: 'Release', 'Platform', 'Project', 'git branch<BR>(link to MR)', 'Job time stamp', 'git clone', 'Externals build', 'CMake config', 'Build time', 'Comp. Errors (w/warnings)', 'Test time', 'CI tests errors (w/warnings)', 'Host'
364 for project in data[1:]:
365 process_CI_Builds_Summary(project)
366

◆ process_CI_Builds_Summary()

python.update_ci_reference_files.process_CI_Builds_Summary ( project)

Definition at line 343 of file update_ci_reference_files.py.

343def process_CI_Builds_Summary(project):
344 # Each entry is one column in the table. 11th is the tests column.
345 # URL to tests page is in form:
346 # <a href="/testsview/?nightly=MR-CI-builds&rel=MR-66303-2023-10-10-19-08&ar=x86_64-centos7-gcc112-opt&proj=AthGeneration">0 (0)</a>
347 test_counts = strip_href(project[11])
348 # This is e.g. '0 (0)'
349 test_error_counts = int(test_counts.split(' ')[0])
350 if test_error_counts > 0:
351 # Okay, we have an error!
352 project_url = 'https://bigpanda.cern.ch'+strip_url(project[11])
353 headers = {'Accept': 'application/json'}
354 r = requests.get(project_url+'&json', headers=headers)
355 data = r.json()["rows_s"]
356 process_CI_Tests_json(data[1:])
357

◆ process_CI_Tests_json()

python.update_ci_reference_files.process_CI_Tests_json ( data)

Definition at line 327 of file update_ci_reference_files.py.

327def process_CI_Tests_json(data):
328 # Each list entry is one column in the table.
329 for row in data:
330 if ('ERROR' in row[0]):
331 process_log_file(strip_url(row[2]), branch = row[1], test_name=strip_href(row[2]))
332

◆ process_diffpool_change()

python.update_ci_reference_files.process_diffpool_change ( text,
ami_tag,
mr_number,
human_readable_date,
test_name )

Definition at line 113 of file update_ci_reference_files.py.

113def process_diffpool_change(text, ami_tag, mr_number, human_readable_date, test_name):
114 eos_path_root = '/eos/atlas/atlascerngroupdisk/data-art/grid-input/WorkflowReferences/'
115
116 # Copied file path
117 # e.g. from ERROR Copied '../SimulationRun3FullSim/run_s4006/myHITS.pool.root' to '/eos/atlas/atlascerngroupdisk/proj-ascig/gitlabci/MR63410_a84345c776e93f0d7f25d00c9e91e35bcb965d09/SimulationRun3FullSimChecks'
118 copied_file_match = re.search(r'^ERROR Copied.*', text, flags=re.MULTILINE)
119 if not copied_file_match:
120 print("FATAL: Could not find matching copied file")
121 sys.exit(1)
122 copied_file_path = copied_file_match.group().split('to')[1].strip().strip("'").strip("&#x27;")+'/'
123
124 # Reference file paths
125 ref_file_match = re.search(r'INFO Reading the reference file from location.*', text)
126 if not ref_file_match:
127 print("FATAL: Could not find matching reference file")
128 sys.exit(1)
129
130 ref_file_path = ref_file_match.group().split('location')[1].strip()
131 existing_version_number= ref_file_path.split('/')[-2]
132 branch = ref_file_path.split('/')[-4]
133 new_version_number = 'v'+str(int(existing_version_number[1:])+1)
134 new_version_directory = eos_path_root+branch+'/'+ami_tag+'/'+new_version_number
135 old_version_directory = eos_path_root+branch+'/'+ami_tag+'/'+existing_version_number
136 # Copied file path
137 # e.g. from ERROR Copied '../SimulationRun3FullSim/run_s4006/myHITS.pool.root' to '/eos/atlas/atlascerngroupdisk/proj-ascig/gitlabci/MR63410_a84345c776e93f0d7f25d00c9e91e35bcb965d09/SimulationRun3FullSimChecks'
138 copied_file_match = re.search(r'^ERROR Copied.*', text, flags=re.MULTILINE)
139 if not copied_file_match:
140 print("FATAL: Could not find matching copied file")
141 sys.exit(1)
142
143 # Sanity checks
144 ami_tag_check = ref_file_path.split('/')[-3].strip()
145 if ami_tag_check!=ami_tag:
146 print('FATAL: Sanity check: "{}" from reference file path "{}" does not match ami tag "{}" extracted previously.'.format(ami_tag_check, ref_file_path, ami_tag))
147 sys.exit(1)
148
149
150 test = CITest(name=test_name, tag=ami_tag, mr=mr_number, date=human_readable_date, existing_ref = old_version_directory, existing_version = existing_version_number, new_version = new_version_number, new_version_directory = new_version_directory, copied_file_path = copied_file_path, diff=None, type='DiffPool')
151 return test
152
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177

◆ process_digest_change()

python.update_ci_reference_files.process_digest_change ( text,
ami_tag,
mr_number,
human_readable_date,
test_name )

Definition at line 153 of file update_ci_reference_files.py.

153def process_digest_change(text, ami_tag, mr_number, human_readable_date, test_name):
154 # Some things aren't so relevant for digest changes
155 existing_version_number = None
156 new_version_directory = None
157 copied_file_path = None
158 new_version_number=None
159
160 # differs from the reference 'q447_AOD_digest.ref' (<):
161 ref_file_match = re.search(
162 r"differs from the reference (?:'|&#x27;)([^'&]+?)(?:'|&#x27;)",
163 text
164 )
165 if not ref_file_match:
166 print("FATAL: Could not find matching reference file")
167 sys.exit(1)
168 ref_file_path = ref_file_match.group(1)
169
170 diff_lines = []
171 diff_started = False # Once we hit the beginning of the diff, we start recording
172 # Diff starts with e.g.
173 # ERROR The output 'q449_AOD_digest.txt' (>) differs from the reference 'q449_AOD_digest.ref' (<):
174 # and ends with next INFO line
175
176 for line in text.split('\n'):
177 if 'differs from the reference' in line:
178 # Start of the diff
179 diff_started = True
180 elif diff_started:
181 if 'INFO' in line:
182 # End of the diff
183 break
184 elif len(line)>0:
185 diff_lines.append(html.unescape(line))
186
187 test = CITest(name=test_name, tag=ami_tag, mr=mr_number, date=human_readable_date, existing_ref = ref_file_path, existing_version = existing_version_number, new_version = new_version_number, new_version_directory = new_version_directory, copied_file_path = copied_file_path, diff=diff_lines, type='Content' if 'content.ref' in ref_file_path else 'Digest')
188 return test
189

◆ process_log_file()

python.update_ci_reference_files.process_log_file ( url,
branch,
test_name )
So now we have a URL to a failing test.
We need to check that the test is failing for the correct reason - namely a reference file which needs updating
The information we need to collect is:
- the AMI tag of the failing tests
- the merge request number
- the location of the reference file
- the location of the copied file
- the name of the test
- the new version number
- the new version directory

Definition at line 62 of file update_ci_reference_files.py.

62def process_log_file(url, branch, test_name):
63 """So now we have a URL to a failing test.
64 We need to check that the test is failing for the correct reason - namely a reference file which needs updating
65 The information we need to collect is:
66 - the AMI tag of the failing tests
67 - the merge request number
68 - the location of the reference file
69 - the location of the copied file
70 - the name of the test
71 - the new version number
72 - the new version directory
73 """
74 page = requests.get(url)
75 text = page.text
76
77 # First check that this looks like a test whose ref files need updating, bail otherwise
78 # INFO All q442 athena steps completed successfully
79 test_match = re.search(r'All (?P<ami_tag>\w+) athena steps completed successfully', text)
80 ami_tag = test_match.group('ami_tag') if test_match else None
81
82 # We have two types of tests, but lets try to extract some common information
83 if not ami_tag:
84 # Okay, maybe it was truncated? Try again.
85 match_attempt_2 = re.search(r'AMIConfig (?P<ami_tag>\w+)', text)
86 if match_attempt_2:
87 ami_tag = match_attempt_2.group('ami_tag')
88
89 if not ami_tag:
90 print('WARNING: Did not find an AMI tag in the test "{}". Ignoring.'.format(test_name))
91 return
92
93 mr_match = re.search(r'ARDOC_TestLog_MR-(?P<mr_number>\d+)-(?P<date>\d{4}-\d{2}-\d{2}-\d{2}-\d{2})', url)
94 if not mr_match:
95 print('FATAL: Could not process the URL as expected. Aborting.')
96 print(url)
97 sys.exit(1)
98
99 mr_number = mr_match.group('mr_number')
100 date = mr_match.group('date')
101 human_readable_date = ':'.join(date.split('-')[0:3]) + " at " + ':'.join(date.split('-')[3:])
102
103 if "Your change breaks the digest in test" in text or 'ERROR Your change modifies the output in test' in text:
104 # Okay, we have a digest change
105 failing_tests[branch].append(process_digest_change(text, ami_tag, mr_number, human_readable_date, test_name))
106
107 if 'ERROR Your change breaks the frozen tier0 policy in test' in text or 'ERROR Your change breaks the frozen derivation policy in test' in text:
108 # DiffPool change
109 failing_tests[branch].append(process_diffpool_change(text, ami_tag, mr_number, human_readable_date, test_name))
110
111 return
112

◆ strip_href()

python.update_ci_reference_files.strip_href ( href)

Definition at line 338 of file update_ci_reference_files.py.

338def strip_href(href):
339 value = href[href.find('>')+1:] # Strip everything up to first >
340 value = value[:value.find('<')]
341 return value
342

◆ strip_url()

python.update_ci_reference_files.strip_url ( href)

Definition at line 333 of file update_ci_reference_files.py.

333def strip_url(href):
334 url = href[href.find('"')+1:] # Strip everything up to first quotation mark
335 url = url[:url.find('"')]
336 return url
337

◆ summarise_failing_tests()

python.update_ci_reference_files.summarise_failing_tests ( check_for_duplicates = True)

Definition at line 367 of file update_ci_reference_files.py.

367def summarise_failing_tests(check_for_duplicates = True):
368 print('Summary of tests which need work:')
369
370 if not failing_tests:
371 print(" -> None found. Aborting.")
372 return None
373
374 mr = None
375 reference_folders = []
376 for branch,tests in failing_tests.items():
377 print (' * Branch: {}'.format(branch))
378 for test in tests:
379 print(' - ', test)
380 if test.type == 'DiffPool':
381 if not test.new_version_directory:
382 print('FATAL: No path to "new version" for test {} of type DiffPool.'.format(test.name))
383 sys.exit(1)
384
385 if os.path.exists(test.new_version_directory):
386 msg = f'WARNING: The directory {test.new_version_directory} already exists. Are you sure you want to overwrite the existing references?'
387 if input("%s (y/N) " % msg).lower() != 'y':
388 sys.exit(1)
389
390 if (test.existing_ref not in reference_folders):
391 reference_folders.append(test.existing_ref)
392 elif check_for_duplicates:
393 print('FATAL: Found two tests which both change the same reference file: {}, which is not supported.'.format(test.existing_ref))
394 print('Consider running again in --test-run mode, to get a copy of the copy commands that could be run.')
395 print('The general advice is to take the largest file (since it will have the most events), and/or take the non-legacy one.')
396 sys.exit(1)
397 mr = test.mr
398 return 'https://gitlab.cern.ch/atlas/athena/-/merge_requests/'+mr
399

◆ update_reference_files()

python.update_ci_reference_files.update_reference_files ( actually_update = True,
update_local_files = False )

Definition at line 190 of file update_ci_reference_files.py.

190def update_reference_files(actually_update=True, update_local_files=False):
191 print()
192 print('Updating reference files')
193 print('========================')
194 commands = []
195 for branch, tests in failing_tests.items():
196 for test in tests:
197 print('Processing test: {} on branch {}'.format(test.name, branch))
198 if test.type == 'DiffPool':
199 print(' * This is a DiffPool test, and currently has version {} of {}. Will update References.py with new version.'.format(test.existing_version, test.tag))
200 if actually_update:
201 print(' -> The new version is: {}. Creating directory and copying files on EOS now.'.format(test.new_version))
202 create_dir_and_copy_refs(test, True)
203 else:
204 # We will print these later, so we can sanity check them when in test mode
205 commands.extend(create_dir_and_copy_refs(test, False))
206 # Remove any duplicates, whilst preserving the order
207 commands = list(dict.fromkeys(commands))
208
209 # Now, update local References.py file
210 if update_local_files:
211 data = []
212 if debug:
213 print ('Updating local References.py file with new version {} for tag {}'.format(test.new_version, test.tag))
214 line_found = False
215 with open('Tools/WorkflowTestRunner/python/References.py', 'r') as f:
216 lines = f.readlines()
217 for line in lines:
218 if test.tag in line:
219 if test.existing_version in line:
220 line = line.replace(test.existing_version, test.new_version)
221 else:
222 print('')
223 print('** WARNING: For tag {} we were looking for existing version {}, but the line in the file is: {}'.format(test.tag, test.existing_version, line), end='')
224 print('** Are you sure your branch is up-to-date with main? We cannot update an older version of References.py!')
225 line_found = True
226 data.append(line)
227
228 if not line_found:
229 print('** WARNING - no matching line was found for the AMI tag {} in References.py. Are you sure your branch is up-to-date with main? We cannot update an older version of References.py!'.format(test.tag))
230
231 with open('Tools/WorkflowTestRunner/python/References.py', 'w') as f:
232 f.writelines(data)
233 elif test.type == 'Digest' and update_local_files:
234 print(' * This is a Digest test. Need to update reference file {}.'.format(test.existing_ref))
235 data = []
236
237 diff_line=0 # We will use this to keep track of which line in the diff we are on
238 digest_old = [line for line in test.diff if line.startswith('<')]
239 digest_new = [line for line in test.diff if line.startswith('>')]
240
241 with open('Tools/PROCTools/data/'+test.existing_ref, 'r') as f:
242 lines = f.readlines()
243 for current_line, line in enumerate(lines):
244 split_curr_line = line.split()
245 if (split_curr_line[0] == 'run'): # Skip header line
246 data.append(line)
247 continue
248
249 # So, we expect first two numbers to be run/event respectively
250 if (not split_curr_line[0].isnumeric()) or (not split_curr_line[1].isnumeric()):
251 print('FATAL: Found a line in current digest which does not start with run/event numbers: {}'.format(line))
252 sys.exit(1)
253
254 split_old_diff_line = digest_old[diff_line].split()
255 split_old_diff_line.pop(0) # Remove the < character
256 split_new_diff_line = digest_new[diff_line].split()
257 split_new_diff_line.pop(0) # Remove the > character
258
259 # Let's check to see if the run/event numbers match
260 if split_curr_line[0] == split_old_diff_line[0] and split_curr_line[1] == split_old_diff_line[1]:
261 # Okay so run/event numbers match. Let's just double-check it wasn't already updated
262 if split_curr_line!=split_old_diff_line:
263 print('FATAL: It seems like this line was already changed.')
264 print('Line we expected: {}'.format(test.old_diff_lines[diff_line]))
265 print('Line we got : {}'.format(line))
266 sys.exit(1)
267
268 # Check if the new run/event numbers match
269 if split_curr_line[0] == split_new_diff_line[0] and split_curr_line[1] == split_new_diff_line[1]:
270 #Replace the existing line with the new one, making sure we right align within 12 characters
271 data.append("".join(["{:>12}".format(x) for x in split_new_diff_line])+ '\n')
272 if ((diff_line+1)<len(digest_old)):
273 diff_line+=1
274 continue
275
276 # Otherwise, we just keep the existing line
277 data.append(line)
278
279 print(' -> Updating PROCTools digest file {}'.format(test.existing_ref))
280 with open('Tools/PROCTools/data/'+test.existing_ref, 'w') as f:
281 f.writelines(data)
282 elif test.type == 'Content' and update_local_files:
283 print(' * This is a Content test. Need to update reference file {}.'.format(test.existing_ref))
284 subprocess.run(f'patch --quiet Tools/PROCTools/data/{test.existing_ref}',
285 input='\n'.join(test.diff)+'\n',
286 text=True, shell=True, check=True)
287
288 return commands
289
290

Variable Documentation

◆ action

python.update_ci_reference_files.action

Definition at line 404 of file update_ci_reference_files.py.

◆ args

python.update_ci_reference_files.args = parser.parse_args()

Definition at line 405 of file update_ci_reference_files.py.

◆ author

python.update_ci_reference_files.author = mr.author['username']

Definition at line 427 of file update_ci_reference_files.py.

◆ commands

python.update_ci_reference_files.commands = update_reference_files(not args.test_run, update_local_files)

Definition at line 447 of file update_ci_reference_files.py.

◆ debug

bool python.update_ci_reference_files.debug = False

Definition at line 60 of file update_ci_reference_files.py.

◆ dirs_created

list python.update_ci_reference_files.dirs_created = []

Definition at line 59 of file update_ci_reference_files.py.

◆ failing_tests

python.update_ci_reference_files.failing_tests = defaultdict(list)

Definition at line 58 of file update_ci_reference_files.py.

◆ gl_project

python.update_ci_reference_files.gl_project = gitlab.Gitlab("https://gitlab.cern.ch").projects.get("atlas/athena")

Definition at line 425 of file update_ci_reference_files.py.

◆ help

python.update_ci_reference_files.help

Definition at line 403 of file update_ci_reference_files.py.

◆ local_branch

str python.update_ci_reference_files.local_branch = f'mr-{mr.iid}'

Definition at line 429 of file update_ci_reference_files.py.

◆ mr

python.update_ci_reference_files.mr = gl_project.mergerequests.get(mr_url.split('/')[-1])

Definition at line 426 of file update_ci_reference_files.py.

◆ mr_url

python.update_ci_reference_files.mr_url = summarise_failing_tests(not args.test_run)

Definition at line 419 of file update_ci_reference_files.py.

◆ msg

str python.update_ci_reference_files.msg = 'Would you like to (locally) update digest ref files and/or versions in References.py?'

Definition at line 438 of file update_ci_reference_files.py.

◆ not_in_athena_dir

python.update_ci_reference_files.not_in_athena_dir = subprocess.call("git rev-parse --is-inside-work-tree", shell=True)

Definition at line 441 of file update_ci_reference_files.py.

◆ parser

python.update_ci_reference_files.parser
Initial value:
1= argparse.ArgumentParser(description=__doc__,
2 formatter_class=argparse.RawDescriptionHelpFormatter)

Definition at line 401 of file update_ci_reference_files.py.

◆ remote

str python.update_ci_reference_files.remote = f'https://:@gitlab.cern.ch:8443/{author}/athena.git'

Definition at line 428 of file update_ci_reference_files.py.

◆ update_local_files

bool python.update_ci_reference_files.update_local_files = False

Definition at line 439 of file update_ci_reference_files.py.