5 """Updates reference files for a given MR, as well as related files (digest ref files, References.py)
7 This script should be run in the root directory of the athena repository,
8 and you should pass in the URL of "CI Builds Summary" page for the MR you are interested in.
9 i.e. the link that you get from the MR under "Full details available on <this CI monitor view>"
11 So, for example, if you are interested in MR 66303, you would run this script as follows:
12 Tools/PROCTools/scripts/update_ci_reference_files.py https://bigpanda.cern.ch/ciview/?rel=MR-63410-2023-10-09-12-27
14 Running with --test-run will modify local files (so you can test that the changes make sense), and will also print out the commands which would have been executed. Nothing remote is changed!
15 This is a good way to check that the proposed changes look rational before actually making in earnest.
18 from collections
import defaultdict
28 print(
'FATAL: this script needs the gitlab and requests modules. Either install them yourself, or run "lsetup gitlab"')
31 def __init__(self, name, tag, mr, date, existing_ref, existing_version, new_version, new_version_directory, copied_file_path, digest_old, digest_new, type):
46 return f
'<CI Test: {self.name} tag: {self.tag} MR: {self.mr} date: {self.date} type: {self.type}>'
50 if self.
type ==
'DiffPool':
51 extra = f
' Data file change : {self.existing_version} -> {self.new_version}'
52 elif self.
type ==
'Digest':
53 extra = f
' Digest change: {self.existing_ref}'
54 return f
'{self.name}:{self.tag} MR: {self.mr}'+extra
56 failing_tests = defaultdict(list)
61 """So now we have a URL to a failing test.
62 We need to check that the test is failing for the correct reason - namely a reference file which needs updating
63 The information we need to collect is:
64 - the AMI tag of the failing tests
65 - the merge request number
66 - the location of the reference file
67 - the location of the copied file
68 - the name of the test
69 - the new version number
70 - the new version directory
72 page = requests.get(url)
77 test_match = re.search(
r'All (?P<ami_tag>\w+) athena steps completed successfully', text)
78 ami_tag = test_match.group(
'ami_tag')
if test_match
else None
83 match_attempt_2 = re.search(
r'AMIConfig (?P<ami_tag>\w+)', text)
85 ami_tag = match_attempt_2.group(
'ami_tag')
88 print(
'WARNING: Did not find an AMI tag in the test "{}". Ignoring.'.
format(test_name))
91 mr_match = re.search(
r'NICOS_TestLog_MR-(?P<mr_number>\d+)-(?P<date>\d{4}-\d{2}-\d{2}-\d{2}-\d{2})', url)
93 print(
'FATAL: Could not process the URL as expected. Aborting.')
97 mr_number = mr_match.group(
'mr_number')
98 date = mr_match.group(
'date')
99 human_readable_date =
':'.
join(date.split(
'-')[0:3]) +
" at " +
':'.
join(date.split(
'-')[3:])
101 if "Your change breaks the digest in test" in text:
105 if 'ERROR Your change breaks the frozen tier0 policy in test' in text
or 'ERROR Your change breaks the frozen derivation policy in test' in text:
112 eos_path_root =
'/eos/atlas/atlascerngroupdisk/data-art/grid-input/WorkflowReferences/'
116 copied_file_match = re.search(
r'^ERROR Copied.*', text, flags=re.MULTILINE)
117 if not copied_file_match:
118 print(
"FATAL: Could not find matching copied file")
120 copied_file_path = copied_file_match.group().
split(
'to')[1].strip().strip(
"'")+
'/'
123 ref_file_match = re.search(
r'INFO Reading the reference file from location.*', text)
124 if not ref_file_match:
125 print(
"FATAL: Could not find matching reference file")
128 ref_file_path = ref_file_match.group().
split(
'location')[1].strip()
129 existing_version_number= ref_file_path.split(
'/')[-2]
130 branch = ref_file_path.split(
'/')[-4]
131 new_version_number =
'v'+
str(
int(existing_version_number[1:])+1)
132 new_version_directory = eos_path_root+branch+
'/'+ami_tag+
'/'+new_version_number
133 old_version_directory = eos_path_root+branch+
'/'+ami_tag+
'/'+existing_version_number
136 copied_file_match = re.search(
r'^ERROR Copied.*', text, flags=re.MULTILINE)
137 if not copied_file_match:
138 print(
"FATAL: Could not find matching copied file")
142 ami_tag_check = ref_file_path.split(
'/')[-3].strip()
143 if ami_tag_check!=ami_tag:
144 print(
'FATAL: Sanity check: "{}" from reference file path "{}" does not match ami tag "{}" extracted previously.'.
format(ami_tag_check, ref_file_path, ami_tag))
148 test =
CITest(name=test_name, tag=ami_tag, mr=mr_number, date=human_readable_date, existing_ref = old_version_directory, existing_version = existing_version_number, new_version = new_version_number, new_version_directory = new_version_directory, copied_file_path = copied_file_path, digest_old=
None, digest_new=
None, type=
'DiffPool')
153 existing_version_number =
None
154 new_version_directory =
None
155 copied_file_path =
None
156 new_version_number=
None
159 ref_file_match = re.search(
r'(.*differs from the reference \')(.*)(\')', text)
160 if not ref_file_match:
161 print(
"FATAL: Could not find matching reference file")
163 ref_file_path = ref_file_match.groups()[1]
172 for line
in text.split(
'\n'):
173 if 'differs from the reference' in line:
177 if line.startswith(
'<'):
178 old_diff_lines.append(line)
179 elif line.startswith(
'>'):
180 new_diff_lines.append(line)
185 test =
CITest(name=test_name, tag=ami_tag, mr=mr_number, date=human_readable_date, existing_ref = ref_file_path, existing_version = existing_version_number, new_version = new_version_number, new_version_directory = new_version_directory, copied_file_path = copied_file_path, digest_old=old_diff_lines, digest_new=new_diff_lines, type=
'Digest')
190 print(
'Updating reference files')
191 print(
'========================')
193 for branch, tests
in failing_tests.items():
195 print(
'Processing test: {} on branch {}'.
format(test.name, branch))
196 if test.type ==
'DiffPool':
197 print(
' * This is a DiffPool test, and currently has version {} of {}. Will update References.py with new version.'.
format(test.existing_version, test.tag))
199 print(
' -> The new version is: {}. Creating directory and copying files on EOS now.'.
format(test.new_version))
205 commands =
list(dict.fromkeys(commands))
208 if update_local_files:
211 print (
'Updating local References.py file with new version {} for tag {}'.
format(test.new_version, test.tag))
213 with open(
'Tools/WorkflowTestRunner/python/References.py',
'r')
as f:
214 lines = f.readlines()
217 if test.existing_version
in line:
218 line = line.replace(test.existing_version, test.new_version)
221 print(
'** WARNING: For tag {} we were looking for existing version {}, but the line in the file is: {}'.
format(test.tag, test.existing_version, line), end=
'')
222 print(
'** Are you sure your branch is up-to-date with main? We cannot update an older version of References.py!')
227 print(
'** WARNING - no matching line was found for the AMI tag {} in References.py. Are you sure your branch is up-to-date with main? We cannot update an older version of References.py!'.
format(test.tag))
229 with open(
'Tools/WorkflowTestRunner/python/References.py',
'w')
as f:
231 elif test.type ==
'Digest' and update_local_files:
232 print(
' * This is a Digest test. Need to update reference file {}.'.
format(test.existing_ref))
236 with open(
'Tools/PROCTools/data/'+test.existing_ref,
'r')
as f:
237 lines = f.readlines()
238 for current_line, line
in enumerate(lines):
239 split_curr_line = line.split()
240 if (split_curr_line[0] ==
'run'):
245 if (
not split_curr_line[0].isnumeric())
or (
not split_curr_line[1].isnumeric()):
246 print(
'FATAL: Found a line in current digest which does not start with run/event numbers: {}'.
format(line))
249 split_old_diff_line = test.digest_old[diff_line].
split()
250 split_old_diff_line.pop(0)
251 split_new_diff_line = test.digest_new[diff_line].
split()
252 split_new_diff_line.pop(0)
255 if split_curr_line[0] == split_old_diff_line[0]
and split_curr_line[1] == split_old_diff_line[1]:
257 if split_curr_line!=split_old_diff_line:
258 print(
'FATAL: It seems like this line was already changed.')
259 print(
'Line we expected: {}'.
format(test.old_diff_lines[diff_line]))
264 if split_curr_line[0] == split_new_diff_line[0]
and split_curr_line[1] == split_new_diff_line[1]:
266 data.append(
"".
join([
"{:>12}".
format(x)
for x
in split_new_diff_line])+
'\n')
267 if ((diff_line+1)<len(test.digest_old)):
274 print(
' -> Updating PROCTools digest file {}'.
format(test.existing_ref))
275 with open(
'Tools/PROCTools/data/'+test.existing_ref,
'w')
as f:
282 If called with actually_update=False, this function will return a list of commands which would have been executed.
285 if test.new_version_directory
not in dirs_created:
286 commands.append(
"mkdir " + test.new_version_directory)
287 dirs_created.append(test.new_version_directory)
291 commands.append(
"cp " + test.copied_file_path +
"* "+ test.new_version_directory+
"/")
292 commands.append(
"cp -n " + test.existing_ref +
"/* "+ test.new_version_directory+
"/")
294 print(
' -> Copying files from {} to {}'.
format(test.copied_file_path, test.new_version_directory))
296 for command
in commands:
298 subprocess.call( command, shell=
True)
299 except Exception
as e:
300 print(
'Command failed due to:', e)
301 print(
'Do you have EOS available on this machine?')
302 except Exception
as e:
303 print(
'FATAL: Unable to copy files due to:', e)
306 f =
open(test.new_version_directory+
'/info.txt',
'w')
307 f.write(
'Merge URL: https://gitlab.cern.ch/atlas/athena/-/merge_requests/{}\n'.
format(test.mr))
308 f.write(
'Date: {}\n'.
format(test.date))
309 f.write(
'AMI: {}\n'.
format(test.tag))
310 f.write(
'Test name: {}\n'.
format(test.name))
311 f.write(
'Files copied from: {}\n'.
format(test.copied_file_path))
319 if (
'ERROR' in row[0]):
323 url = href[href.find(
'"')+1:]
324 url = url[:url.find(
'"')]
328 value = href[href.find(
'>')+1:]
329 value = value[:value.find(
'<')]
338 test_error_counts =
int(test_counts.split(
' ')[0])
339 if test_error_counts > 0:
341 project_url =
'https://bigpanda.cern.ch'+
strip_url(project[11])
342 headers = {
'Accept':
'application/json'}
343 r = requests.get(project_url+
'&json', headers=headers)
344 data = r.json()[
"rows_s"]
348 headers = {
'Accept':
'application/json'}
349 r = requests.get(url+
'&json', headers=headers)
350 data = r.json()[
"rows_s"]
353 for project
in data[1:]:
357 print(
'Summary of tests which need work:')
359 if not failing_tests:
360 print(
" -> None found. Aborting.")
364 reference_folders = []
365 for branch,tests
in failing_tests.items():
366 print (
' * Branch: {}'.
format(branch))
369 if test.type ==
'DiffPool':
370 if not test.new_version_directory:
371 print(
'FATAL: No path to "new version" for test {} of type DiffPool.'.
format(test.name))
374 if os.path.exists(test.new_version_directory):
375 msg = f
'WARNING: The directory {test.new_version_directory} already exists. Are you sure you want to overwrite the existing references?'
376 if input(
"%s (y/N) " % msg).lower() !=
'y':
379 if (test.existing_ref
not in reference_folders):
380 reference_folders.append(test.existing_ref)
381 elif check_for_duplicates:
382 print(
'FATAL: Found two tests which both change the same reference file: {}, which is not supported.'.
format(test.existing_ref))
383 print(
'Consider running again in --test-run mode, to get a copy of the copy commands that could be run.')
384 print(
'The general advice is to take the largest file (since it will have the most events), and/or take the non-legacy one.')
387 return 'https://gitlab.cern.ch/atlas/athena/-/merge_requests/'+mr
389 if __name__ ==
'__main__':
390 parser = argparse.ArgumentParser(description=__doc__,
391 formatter_class=argparse.RawDescriptionHelpFormatter)
392 parser.add_argument(
'url', help=
'URL to CITest (put in quotes))')
393 parser.add_argument(
'--test-run',help=
'Update local text files, but do not actually touch EOS.', action=
'store_true')
394 args = parser.parse_args()
395 print(
'Update reference files for URL: {}'.
format(args.url))
397 if not args.url.startswith((
'http://',
'https://')):
398 print(
'invalid url - should start with http:// or https://')
404 print(
' -> Running in test mode so will not touch EOS, but will only modify files locally (these changes can easily be reverted with "git checkout" etc).')
406 print(
'========================')
411 print(
'========================')
414 gl_project = gitlab.Gitlab(
"https://gitlab.cern.ch").projects.get(
"atlas/athena")
415 mr = gl_project.mergerequests.get(mr_url.split(
'/')[-1])
416 author = mr.author[
'username']
417 remote = f
'https://:@gitlab.cern.ch:8443/{author}/athena.git'
418 local_branch = f
'mr-{mr.iid}'
420 print(
"The next step is to update the MR with the new content i.e. the References.py file and the digest files.")
421 print(
" IMPORTANT: before you do this, you must first make sure that the local repository is on same branch as the MR by doing:")
422 print(f
" $ git fetch --no-tags {remote} {mr.source_branch}:{local_branch}")
423 print(f
" $ git switch {local_branch}")
424 print(
" $ git rebase upstream/main")
427 msg =
'Would you like to (locally) update digest ref files and/or versions in References.py?'
428 update_local_files =
False
429 if input(
"%s (y/N) " % msg).lower() ==
'y':
430 not_in_athena_dir = subprocess.call(
"git rev-parse --is-inside-work-tree", shell=
True)
431 if not_in_athena_dir:
432 print(
'FATAL: You must run this script from within the athena directory.')
434 update_local_files =
True
438 if commands
and args.test_run:
440 print(
' -> In test-run mode. In normal mode we would also have executed:')
441 for command
in commands:
443 if not args.test_run:
445 print(
"Finished! Before pushing, you might want to manually trigger an EOS to cvmfs copy here: https://atlas-jenkins.cern.ch/view/all/job/ART_data_eos2cvmfs/")
446 print(
"Then commit your changes and (force) push the updated branch to the author's remote:")
447 print(
" $ git commit")
448 print(f
" $ git push [-f] {remote} {local_branch}:{mr.source_branch}")