ATLAS Offline Software
Loading...
Searching...
No Matches
update_ci_reference_files.py
Go to the documentation of this file.
1#!/bin/env python3
2# Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3
4
5"""Updates reference files for a given MR, as well as related files (digest ref files, References.py)
6
7This script should be run in the root directory of the athena repository,
8and you should pass in the URL of "CI Builds Summary" page for the MR you are interested in.
9i.e. the link that you get from the MR under "Full details available on <this CI monitor view>"
10
11So, for example, if you are interested in MR 66303, you would run this script as follows:
12Tools/PROCTools/scripts/update_ci_reference_files.py https://bigpanda.cern.ch/ciview/?rel=MR-63410-2023-10-09-12-27
13
14Running with --test-run will modify local files (so you can test that the changes make sense), and will also print out the commands which would have been executed. Nothing remote is changed!
15This is a good way to check that the proposed changes look rational before actually making in earnest.
16"""
17
18from collections import defaultdict
19import os
20import sys
21import subprocess
22import re
23import argparse
24try:
25 import gitlab
26 import requests
27except ImportError:
28 print('FATAL: this script needs the gitlab and requests modules. Either install them yourself, or run "lsetup gitlab"')
29
30class CITest:
31 def __init__(self, name, tag, mr, date, existing_ref, existing_version, new_version, new_version_directory, copied_file_path, digest_old, digest_new, type):
32 self.name = name
33 self.tag = tag
34 self.mr = mr
35 self.date = date
36 self.existing_ref = existing_ref
37 self.existing_version = existing_version
38 self.new_version = new_version
39 self.new_version_directory = new_version_directory
40 self.copied_file_path = copied_file_path
41 self.digest_old = digest_old
42 self.digest_new = digest_new
43 self.type = type
44
45 def __repr__(self):
46 return f'<CI Test: {self.name} tag: {self.tag} MR: {self.mr} date: {self.date} type: {self.type}>'
47
48 def __str__(self):
49 extra = ''
50 if self.type == 'DiffPool':
51 extra = f' Data file change : {self.existing_version} -> {self.new_version}'
52 elif self.type == 'Digest':
53 extra = f' Digest change: {self.existing_ref}'
54 return f'{self.name}:{self.tag} MR: {self.mr}'+extra
55
56failing_tests = defaultdict(list) # Key is branch, value is list of CITest objects
57dirs_created=[] #Used later to ensure we don't try to create the same directory twice
58debug = False
59
60def process_log_file(url, branch, test_name):
61 """So now we have a URL to a failing test.
62 We need to check that the test is failing for the correct reason - namely a reference file which needs updating
63 The information we need to collect is:
64 - the AMI tag of the failing tests
65 - the merge request number
66 - the location of the reference file
67 - the location of the copied file
68 - the name of the test
69 - the new version number
70 - the new version directory
71 """
72 page = requests.get(url)
73 text = page.text
74
75 # First check that this looks like a test whose ref files need updating, bail otherwise
76 # INFO All q442 athena steps completed successfully
77 test_match = re.search(r'All (?P<ami_tag>\w+) athena steps completed successfully', text)
78 ami_tag = test_match.group('ami_tag') if test_match else None
79
80 # We have two types of tests, but lets try to extract some common information
81 if not ami_tag:
82 # Okay, maybe it was truncated? Try again.
83 match_attempt_2 = re.search(r'AMIConfig (?P<ami_tag>\w+)', text)
84 if match_attempt_2:
85 ami_tag = match_attempt_2.group('ami_tag')
86
87 if not ami_tag:
88 print('WARNING: Did not find an AMI tag in the test "{}". Ignoring.'.format(test_name))
89 return
90
91 mr_match = re.search(r'ARDOC_TestLog_MR-(?P<mr_number>\d+)-(?P<date>\d{4}-\d{2}-\d{2}-\d{2}-\d{2})', url)
92 if not mr_match:
93 print('FATAL: Could not process the URL as expected. Aborting.')
94 print(url)
95 sys.exit(1)
96
97 mr_number = mr_match.group('mr_number')
98 date = mr_match.group('date')
99 human_readable_date = ':'.join(date.split('-')[0:3]) + " at " + ':'.join(date.split('-')[3:])
100
101 if "Your change breaks the digest in test" in text:
102 # Okay, we have a digest change
103 failing_tests[branch].append(process_digest_change(text, ami_tag, mr_number, human_readable_date, test_name))
104
105 if 'ERROR Your change breaks the frozen tier0 policy in test' in text or 'ERROR Your change breaks the frozen derivation policy in test' in text:
106 # DiffPool change
107 failing_tests[branch].append(process_diffpool_change(text, ami_tag, mr_number, human_readable_date, test_name))
108
109 return
110
111def process_diffpool_change(text, ami_tag, mr_number, human_readable_date, test_name):
112 eos_path_root = '/eos/atlas/atlascerngroupdisk/data-art/grid-input/WorkflowReferences/'
113
114 # Copied file path
115 # e.g. from ERROR Copied '../SimulationRun3FullSim/run_s4006/myHITS.pool.root' to '/eos/atlas/atlascerngroupdisk/proj-ascig/gitlabci/MR63410_a84345c776e93f0d7f25d00c9e91e35bcb965d09/SimulationRun3FullSimChecks'
116 copied_file_match = re.search(r'^ERROR Copied.*', text, flags=re.MULTILINE)
117 if not copied_file_match:
118 print("FATAL: Could not find matching copied file")
119 sys.exit(1)
120 copied_file_path = copied_file_match.group().split('to')[1].strip().strip("'").strip("&#x27;")+'/'
121
122 # Reference file paths
123 ref_file_match = re.search(r'INFO Reading the reference file from location.*', text)
124 if not ref_file_match:
125 print("FATAL: Could not find matching reference file")
126 sys.exit(1)
127
128 ref_file_path = ref_file_match.group().split('location')[1].strip()
129 existing_version_number= ref_file_path.split('/')[-2]
130 branch = ref_file_path.split('/')[-4]
131 new_version_number = 'v'+str(int(existing_version_number[1:])+1)
132 new_version_directory = eos_path_root+branch+'/'+ami_tag+'/'+new_version_number
133 old_version_directory = eos_path_root+branch+'/'+ami_tag+'/'+existing_version_number
134 # Copied file path
135 # e.g. from ERROR Copied '../SimulationRun3FullSim/run_s4006/myHITS.pool.root' to '/eos/atlas/atlascerngroupdisk/proj-ascig/gitlabci/MR63410_a84345c776e93f0d7f25d00c9e91e35bcb965d09/SimulationRun3FullSimChecks'
136 copied_file_match = re.search(r'^ERROR Copied.*', text, flags=re.MULTILINE)
137 if not copied_file_match:
138 print("FATAL: Could not find matching copied file")
139 sys.exit(1)
140
141 # Sanity checks
142 ami_tag_check = ref_file_path.split('/')[-3].strip()
143 if ami_tag_check!=ami_tag:
144 print('FATAL: Sanity check: "{}" from reference file path "{}" does not match ami tag "{}" extracted previously.'.format(ami_tag_check, ref_file_path, ami_tag))
145 sys.exit(1)
146
147
148 test = CITest(name=test_name, tag=ami_tag, mr=mr_number, date=human_readable_date, existing_ref = old_version_directory, existing_version = existing_version_number, new_version = new_version_number, new_version_directory = new_version_directory, copied_file_path = copied_file_path, digest_old=None, digest_new=None, type='DiffPool')
149 return test
150
151def process_digest_change(text, ami_tag, mr_number, human_readable_date, test_name):
152 # Some things aren't so relevant for digest changes
153 existing_version_number = None
154 new_version_directory = None
155 copied_file_path = None
156 new_version_number=None
157
158 # differs from the reference 'q447_AOD_digest.ref' (<):
159 ref_file_match = re.search(
160 r"differs from the reference (?:'|&#x27;)([^'&]+?)(?:'|&#x27;)",
161 text
162 )
163 if not ref_file_match:
164 print("FATAL: Could not find matching reference file")
165 sys.exit(1)
166 ref_file_path = ref_file_match.group(1)
167
168 old_diff_lines = []
169 new_diff_lines = []
170 diff_started = False # Once we hit the beginning of the diff, we start recording
171 # Diff starts with e.g.
172 # ERROR The output 'q449_AOD_digest.txt' (>) differs from the reference 'q449_AOD_digest.ref' (<):
173 # and ends with next INFO line
174
175 for line in text.split('\n'):
176 if 'differs from the reference' in line:
177 # Start of the diff
178 diff_started = True
179 elif diff_started:
180 if line.startswith('&lt;'):
181 old_diff_lines.append(line)
182 elif line.startswith('&gt;'):
183 new_diff_lines.append(line)
184 elif 'INFO' in line:
185 # End of the diff
186 break
187
188 test = CITest(name=test_name, tag=ami_tag, mr=mr_number, date=human_readable_date, existing_ref = ref_file_path, existing_version = existing_version_number, new_version = new_version_number, new_version_directory = new_version_directory, copied_file_path = copied_file_path, digest_old=old_diff_lines, digest_new=new_diff_lines, type='Digest')
189 return test
190
191def update_reference_files(actually_update=True, update_local_files=False):
192 print
193 print('Updating reference files')
194 print('========================')
195 commands = []
196 for branch, tests in failing_tests.items():
197 for test in tests:
198 print('Processing test: {} on branch {}'.format(test.name, branch))
199 if test.type == 'DiffPool':
200 print(' * This is a DiffPool test, and currently has version {} of {}. Will update References.py with new version.'.format(test.existing_version, test.tag))
201 if actually_update:
202 print(' -> The new version is: {}. Creating directory and copying files on EOS now.'.format(test.new_version))
203 create_dir_and_copy_refs(test, True)
204 else:
205 # We will print these later, so we can sanity check them when in test mode
206 commands.extend(create_dir_and_copy_refs(test, False))
207 # Remove any duplicates, whilst preserving the order
208 commands = list(dict.fromkeys(commands))
209
210 # Now, update local References.py file
211 if update_local_files:
212 data = []
213 if debug:
214 print ('Updating local References.py file with new version {} for tag {}'.format(test.new_version, test.tag))
215 line_found = False
216 with open('Tools/WorkflowTestRunner/python/References.py', 'r') as f:
217 lines = f.readlines()
218 for line in lines:
219 if test.tag in line:
220 if test.existing_version in line:
221 line = line.replace(test.existing_version, test.new_version)
222 else:
223 print('')
224 print('** WARNING: For tag {} we were looking for existing version {}, but the line in the file is: {}'.format(test.tag, test.existing_version, line), end='')
225 print('** Are you sure your branch is up-to-date with main? We cannot update an older version of References.py!')
226 line_found = True
227 data.append(line)
228
229 if not line_found:
230 print('** WARNING - no matching line was found for the AMI tag {} in References.py. Are you sure your branch is up-to-date with main? We cannot update an older version of References.py!'.format(test.tag))
231
232 with open('Tools/WorkflowTestRunner/python/References.py', 'w') as f:
233 f.writelines(data)
234 elif test.type == 'Digest' and update_local_files:
235 print(' * This is a Digest test. Need to update reference file {}.'.format(test.existing_ref))
236 data = []
237
238 diff_line=0 # We will use this to keep track of which line in the diff we are on
239 with open('Tools/PROCTools/data/'+test.existing_ref, 'r') as f:
240 lines = f.readlines()
241 for current_line, line in enumerate(lines):
242 split_curr_line = line.split()
243 if (split_curr_line[0] == 'run'): # Skip header line
244 data.append(line)
245 continue
246
247 # So, we expect first two numbers to be run/event respectively
248 if (not split_curr_line[0].isnumeric()) or (not split_curr_line[1].isnumeric()):
249 print('FATAL: Found a line in current digest which does not start with run/event numbers: {}'.format(line))
250 sys.exit(1)
251
252 split_old_diff_line = test.digest_old[diff_line].split()
253 split_old_diff_line.pop(0) # Remove the < character
254 split_new_diff_line = test.digest_new[diff_line].split()
255 split_new_diff_line.pop(0) # Remove the > character
256
257 # Let's check to see if the run/event numbers match
258 if split_curr_line[0] == split_old_diff_line[0] and split_curr_line[1] == split_old_diff_line[1]:
259 # Okay so run/event numbers match. Let's just double-check it wasn't already updated
260 if split_curr_line!=split_old_diff_line:
261 print('FATAL: It seems like this line was already changed.')
262 print('Line we expected: {}'.format(test.old_diff_lines[diff_line]))
263 print('Line we got : {}'.format(line))
264 sys.exit(1)
265
266 # Check if the new run/event numbers match
267 if split_curr_line[0] == split_new_diff_line[0] and split_curr_line[1] == split_new_diff_line[1]:
268 #Replace the existing line with the new one, making sure we right align within 12 characters
269 data.append("".join(["{:>12}".format(x) for x in split_new_diff_line])+ '\n')
270 if ((diff_line+1)<len(test.digest_old)):
271 diff_line+=1
272 continue
273
274 # Otherwise, we just keep the existing line
275 data.append(line)
276
277 print(' -> Updating PROCTools digest file {}'.format(test.existing_ref))
278 with open('Tools/PROCTools/data/'+test.existing_ref, 'w') as f:
279 f.writelines(data)
280 return commands
281
282
283def create_dir_and_copy_refs(test, actually_update=False):
284 """
285 If called with actually_update=False, this function will return a list of commands which would have been executed.
286 """
287 commands = []
288 if test.new_version_directory not in dirs_created:
289 commands.append("mkdir -p " + test.new_version_directory)
290 dirs_created.append(test.new_version_directory)
291
292 # Copy new directory first, then copy old (in case the new MR did not touch all files)
293 # Important! Use no-clobber for second copy or we will overwrite the new data with old!
294 commands.append("cp " + test.copied_file_path + "* "+ test.new_version_directory+"/")
295 commands.append("cp -n " + test.existing_ref + "/* "+ test.new_version_directory+"/")
296 if actually_update:
297 print(' -> Copying files from {} to {}'.format(test.copied_file_path, test.new_version_directory))
298 try:
299 for command in commands:
300 try:
301 subprocess.call( command, shell=True)
302 except Exception as e:
303 print('Command failed due to:', e)
304 print('Do you have EOS available on this machine?')
305 except Exception as e:
306 print('FATAL: Unable to copy files due to:', e)
307 sys.exit(1)
308
309 f = open(test.new_version_directory+'/info.txt', 'w')
310 f.write('Merge URL: https://gitlab.cern.ch/atlas/athena/-/merge_requests/{}\n'.format(test.mr))
311 f.write('Date: {}\n'.format(test.date))
312 f.write('AMI: {}\n'.format(test.tag))
313 f.write('Test name: {}\n'.format(test.name))
314 f.write('Files copied from: {}\n'.format(test.copied_file_path))
315 f.close()
316
317 return commands
318
320 # Each list entry is one column in the table.
321 for row in data:
322 if ('ERROR' in row[0]):
323 process_log_file(strip_url(row[2]), branch = row[1], test_name=strip_href(row[2]))
324
325def strip_url(href):
326 url = href[href.find('"')+1:] # Strip everything up to first quotation mark
327 url = url[:url.find('"')]
328 return url
329
330def strip_href(href):
331 value = href[href.find('>')+1:] # Strip everything up to first >
332 value = value[:value.find('<')]
333 return value
334
336 # Each entry is one column in the table. 11th is the tests column.
337 # URL to tests page is in form:
338 # <a href="/testsview/?nightly=MR-CI-builds&rel=MR-66303-2023-10-10-19-08&ar=x86_64-centos7-gcc112-opt&proj=AthGeneration">0 (0)</a>
339 test_counts = strip_href(project[11])
340 # This is e.g. '0 (0)'
341 test_error_counts = int(test_counts.split(' ')[0])
342 if test_error_counts > 0:
343 # Okay, we have an error!
344 project_url = 'https://bigpanda.cern.ch'+strip_url(project[11])
345 headers = {'Accept': 'application/json'}
346 r = requests.get(project_url+'&json', headers=headers)
347 data = r.json()["rows_s"]
348 process_CI_Tests_json(data[1:])
349
351 headers = {'Accept': 'application/json'}
352 r = requests.get(url+'&json', headers=headers)
353 data = r.json()["rows_s"]
354 # First row is header.
355 # Currently this is: 'Release', 'Platform', 'Project', 'git branch<BR>(link to MR)', 'Job time stamp', 'git clone', 'Externals build', 'CMake config', 'Build time', 'Comp. Errors (w/warnings)', 'Test time', 'CI tests errors (w/warnings)', 'Host'
356 for project in data[1:]:
358
359def summarise_failing_tests(check_for_duplicates = True):
360 print('Summary of tests which need work:')
361
362 if not failing_tests:
363 print(" -> None found. Aborting.")
364 return None
365
366 mr = None
367 reference_folders = []
368 for branch,tests in failing_tests.items():
369 print (' * Branch: {}'.format(branch))
370 for test in tests:
371 print(' - ', test)
372 if test.type == 'DiffPool':
373 if not test.new_version_directory:
374 print('FATAL: No path to "new version" for test {} of type DiffPool.'.format(test.name))
375 sys.exit(1)
376
377 if os.path.exists(test.new_version_directory):
378 msg = f'WARNING: The directory {test.new_version_directory} already exists. Are you sure you want to overwrite the existing references?'
379 if input("%s (y/N) " % msg).lower() != 'y':
380 sys.exit(1)
381
382 if (test.existing_ref not in reference_folders):
383 reference_folders.append(test.existing_ref)
384 elif check_for_duplicates:
385 print('FATAL: Found two tests which both change the same reference file: {}, which is not supported.'.format(test.existing_ref))
386 print('Consider running again in --test-run mode, to get a copy of the copy commands that could be run.')
387 print('The general advice is to take the largest file (since it will have the most events), and/or take the non-legacy one.')
388 sys.exit(1)
389 mr = test.mr
390 return 'https://gitlab.cern.ch/atlas/athena/-/merge_requests/'+mr
391
392if __name__ == '__main__':
393 parser = argparse.ArgumentParser(description=__doc__,
394 formatter_class=argparse.RawDescriptionHelpFormatter)
395 parser.add_argument('url', help='URL to CITest (put in quotes))')
396 parser.add_argument('--test-run',help='Update local text files, but do not actually touch EOS.', action='store_true')
397 args = parser.parse_args()
398 print('Update reference files for URL: {}'.format(args.url))
399
400 if not args.url.startswith(('http://', 'https://')):
401 print('invalid url - should start with http:// or https://')
402 print(args.url)
403 print('Aborting.')
404 sys.exit(1)
405
406 if args.test_run:
407 print(' -> Running in test mode so will not touch EOS, but will only modify files locally (these changes can easily be reverted with "git checkout" etc).')
408
409 print('========================')
411 mr_url = summarise_failing_tests(not args.test_run)
412 if not mr_url:
413 sys.exit(1)
414 print('========================')
415
416 # Retrieve MR infos:
417 gl_project = gitlab.Gitlab("https://gitlab.cern.ch").projects.get("atlas/athena")
418 mr = gl_project.mergerequests.get(mr_url.split('/')[-1])
419 author = mr.author['username']
420 remote = f'https://:@gitlab.cern.ch:8443/{author}/athena.git'
421 local_branch = f'mr-{mr.iid}'
422
423 print("The next step is to update the MR with the new content i.e. the References.py file and the digest files.")
424 print(" IMPORTANT: before you do this, you must first make sure that the local repository is on same branch as the MR by doing:")
425 print(f" $ git fetch --no-tags {remote} {mr.source_branch}:{local_branch}")
426 print(f" $ git switch {local_branch}")
427 print(" $ git rebase upstream/main") # In case there have been any changes since the MR was created
428 print()
429
430 msg = 'Would you like to (locally) update digest ref files and/or versions in References.py?'
431 update_local_files = False
432 if input("%s (y/N) " % msg).lower() == 'y':
433 not_in_athena_dir = subprocess.call("git rev-parse --is-inside-work-tree", shell=True)
434 if not_in_athena_dir:
435 print('FATAL: You must run this script from within the athena directory.')
436 sys.exit(1)
437 update_local_files = True
438
439 commands = update_reference_files(not args.test_run, update_local_files)
440
441 if commands and args.test_run:
442 print()
443 print(' -> In test-run mode. In normal mode we would also have executed:')
444 for command in commands:
445 print(' ', command)
446 if not args.test_run:
447 print()
448 print("Finished! Before pushing, you might want to manually trigger an EOS to cvmfs copy here: https://atlas-jenkins.cern.ch/view/all/job/ART_data_eos2cvmfs/")
449 print("Then commit your changes and (force) push the updated branch to the author's remote:")
450 print(" $ git commit")
451 print(f" $ git push [-f] {remote} {local_branch}:{mr.source_branch}")
void print(char *figname, TCanvas *c1)
__init__(self, name, tag, mr, date, existing_ref, existing_version, new_version, new_version_directory, copied_file_path, digest_old, digest_new, type)
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
process_digest_change(text, ami_tag, mr_number, human_readable_date, test_name)
create_dir_and_copy_refs(test, actually_update=False)
process_diffpool_change(text, ami_tag, mr_number, human_readable_date, test_name)
summarise_failing_tests(check_for_duplicates=True)
update_reference_files(actually_update=True, update_local_files=False)