5"""Updates reference files for a given MR, as well as related files (digest ref files, References.py)
7This script should be run in the root directory of the athena repository,
8and you should pass in the URL of "CI Builds Summary" page for the MR you are interested in.
9i.e. the link that you get from the MR under "Full details available on <this CI monitor view>"
11So, for example, if you are interested in MR 66303, you would run this script as follows:
12Tools/PROCTools/scripts/update_ci_reference_files.py https://bigpanda.cern.ch/ciview/?rel=MR-63410-2023-10-09-12-27
14Running with --test-run will modify local files (so you can test that the changes make sense), and will also print out the commands which would have been executed. Nothing remote is changed!
15This is a good way to check that the proposed changes look rational before actually making in earnest.
18from collections
import defaultdict
28 print(
'FATAL: this script needs the gitlab and requests modules. Either install them yourself, or run "lsetup gitlab"')
31 def __init__(self, name, tag, mr, date, existing_ref, existing_version, new_version, new_version_directory, copied_file_path, digest_old, digest_new, type):
46 return f
'<CI Test: {self.name} tag: {self.tag} MR: {self.mr} date: {self.date} type: {self.type}>'
50 if self.
type ==
'DiffPool':
51 extra = f
' Data file change : {self.existing_version} -> {self.new_version}'
52 elif self.
type ==
'Digest':
53 extra = f
' Digest change: {self.existing_ref}'
54 return f
'{self.name}:{self.tag} MR: {self.mr}'+extra
56failing_tests = defaultdict(list)
61 """So now we have a URL to a failing test.
62 We need to check that the test is failing for the correct reason - namely a reference file which needs updating
63 The information we need to collect is:
64 - the AMI tag of the failing tests
65 - the merge request number
66 - the location of the reference file
67 - the location of the copied file
68 - the name of the test
69 - the new version number
70 - the new version directory
72 page = requests.get(url)
77 test_match = re.search(
r'All (?P<ami_tag>\w+) athena steps completed successfully', text)
78 ami_tag = test_match.group(
'ami_tag')
if test_match
else None
83 match_attempt_2 = re.search(
r'AMIConfig (?P<ami_tag>\w+)', text)
85 ami_tag = match_attempt_2.group(
'ami_tag')
88 print(
'WARNING: Did not find an AMI tag in the test "{}". Ignoring.'.format(test_name))
91 mr_match = re.search(
r'ARDOC_TestLog_MR-(?P<mr_number>\d+)-(?P<date>\d{4}-\d{2}-\d{2}-\d{2}-\d{2})', url)
93 print(
'FATAL: Could not process the URL as expected. Aborting.')
97 mr_number = mr_match.group(
'mr_number')
98 date = mr_match.group(
'date')
99 human_readable_date =
':'.join(date.split(
'-')[0:3]) +
" at " +
':'.join(date.split(
'-')[3:])
101 if "Your change breaks the digest in test" in text:
103 failing_tests[branch].append(
process_digest_change(text, ami_tag, mr_number, human_readable_date, test_name))
105 if 'ERROR Your change breaks the frozen tier0 policy in test' in text
or 'ERROR Your change breaks the frozen derivation policy in test' in text:
107 failing_tests[branch].append(
process_diffpool_change(text, ami_tag, mr_number, human_readable_date, test_name))
112 eos_path_root =
'/eos/atlas/atlascerngroupdisk/data-art/grid-input/WorkflowReferences/'
116 copied_file_match = re.search(
r'^ERROR Copied.*', text, flags=re.MULTILINE)
117 if not copied_file_match:
118 print(
"FATAL: Could not find matching copied file")
120 copied_file_path = copied_file_match.group().
split(
'to')[1].
strip().
strip(
"'").
strip(
"'")+
'/'
123 ref_file_match = re.search(
r'INFO Reading the reference file from location.*', text)
124 if not ref_file_match:
125 print(
"FATAL: Could not find matching reference file")
128 ref_file_path = ref_file_match.group().
split(
'location')[1].
strip()
129 existing_version_number= ref_file_path.split(
'/')[-2]
130 branch = ref_file_path.split(
'/')[-4]
131 new_version_number =
'v'+str(int(existing_version_number[1:])+1)
132 new_version_directory = eos_path_root+branch+
'/'+ami_tag+
'/'+new_version_number
133 old_version_directory = eos_path_root+branch+
'/'+ami_tag+
'/'+existing_version_number
136 copied_file_match = re.search(
r'^ERROR Copied.*', text, flags=re.MULTILINE)
137 if not copied_file_match:
138 print(
"FATAL: Could not find matching copied file")
142 ami_tag_check = ref_file_path.split(
'/')[-3].
strip()
143 if ami_tag_check!=ami_tag:
144 print(
'FATAL: Sanity check: "{}" from reference file path "{}" does not match ami tag "{}" extracted previously.'.format(ami_tag_check, ref_file_path, ami_tag))
148 test =
CITest(name=test_name, tag=ami_tag, mr=mr_number, date=human_readable_date, existing_ref = old_version_directory, existing_version = existing_version_number, new_version = new_version_number, new_version_directory = new_version_directory, copied_file_path = copied_file_path, digest_old=
None, digest_new=
None, type=
'DiffPool')
153 existing_version_number =
None
154 new_version_directory =
None
155 copied_file_path =
None
156 new_version_number=
None
159 ref_file_match = re.search(
160 r"differs from the reference (?:'|')([^'&]+?)(?:'|')",
163 if not ref_file_match:
164 print(
"FATAL: Could not find matching reference file")
166 ref_file_path = ref_file_match.group(1)
175 for line
in text.split(
'\n'):
176 if 'differs from the reference' in line:
180 if line.startswith(
'<'):
181 old_diff_lines.append(line)
182 elif line.startswith(
'>'):
183 new_diff_lines.append(line)
188 test =
CITest(name=test_name, tag=ami_tag, mr=mr_number, date=human_readable_date, existing_ref = ref_file_path, existing_version = existing_version_number, new_version = new_version_number, new_version_directory = new_version_directory, copied_file_path = copied_file_path, digest_old=old_diff_lines, digest_new=new_diff_lines, type=
'Digest')
193 print(
'Updating reference files')
194 print(
'========================')
196 for branch, tests
in failing_tests.items():
198 print(
'Processing test: {} on branch {}'.format(test.name, branch))
199 if test.type ==
'DiffPool':
200 print(
' * This is a DiffPool test, and currently has version {} of {}. Will update References.py with new version.'.format(test.existing_version, test.tag))
202 print(
' -> The new version is: {}. Creating directory and copying files on EOS now.'.format(test.new_version))
208 commands = list(dict.fromkeys(commands))
211 if update_local_files:
214 print (
'Updating local References.py file with new version {} for tag {}'.format(test.new_version, test.tag))
216 with open(
'Tools/WorkflowTestRunner/python/References.py',
'r')
as f:
217 lines = f.readlines()
220 if test.existing_version
in line:
221 line = line.replace(test.existing_version, test.new_version)
224 print(
'** WARNING: For tag {} we were looking for existing version {}, but the line in the file is: {}'.format(test.tag, test.existing_version, line), end=
'')
225 print(
'** Are you sure your branch is up-to-date with main? We cannot update an older version of References.py!')
230 print(
'** WARNING - no matching line was found for the AMI tag {} in References.py. Are you sure your branch is up-to-date with main? We cannot update an older version of References.py!'.format(test.tag))
232 with open(
'Tools/WorkflowTestRunner/python/References.py',
'w')
as f:
234 elif test.type ==
'Digest' and update_local_files:
235 print(
' * This is a Digest test. Need to update reference file {}.'.format(test.existing_ref))
239 with open(
'Tools/PROCTools/data/'+test.existing_ref,
'r')
as f:
240 lines = f.readlines()
241 for current_line, line
in enumerate(lines):
242 split_curr_line = line.split()
243 if (split_curr_line[0] ==
'run'):
248 if (
not split_curr_line[0].isnumeric())
or (
not split_curr_line[1].isnumeric()):
249 print(
'FATAL: Found a line in current digest which does not start with run/event numbers: {}'.format(line))
252 split_old_diff_line = test.digest_old[diff_line].
split()
253 split_old_diff_line.pop(0)
254 split_new_diff_line = test.digest_new[diff_line].
split()
255 split_new_diff_line.pop(0)
258 if split_curr_line[0] == split_old_diff_line[0]
and split_curr_line[1] == split_old_diff_line[1]:
260 if split_curr_line!=split_old_diff_line:
261 print(
'FATAL: It seems like this line was already changed.')
262 print(
'Line we expected: {}'.format(test.old_diff_lines[diff_line]))
263 print(
'Line we got : {}'.format(line))
267 if split_curr_line[0] == split_new_diff_line[0]
and split_curr_line[1] == split_new_diff_line[1]:
269 data.append(
"".join([
"{:>12}".format(x)
for x
in split_new_diff_line])+
'\n')
270 if ((diff_line+1)<len(test.digest_old)):
277 print(
' -> Updating PROCTools digest file {}'.format(test.existing_ref))
278 with open(
'Tools/PROCTools/data/'+test.existing_ref,
'w')
as f:
285 If called with actually_update=False, this function will return a list of commands which would have been executed.
288 if test.new_version_directory
not in dirs_created:
289 commands.append(
"mkdir -p " + test.new_version_directory)
290 dirs_created.append(test.new_version_directory)
294 commands.append(
"cp " + test.copied_file_path +
"* "+ test.new_version_directory+
"/")
295 commands.append(
"cp -n " + test.existing_ref +
"/* "+ test.new_version_directory+
"/")
297 print(
' -> Copying files from {} to {}'.format(test.copied_file_path, test.new_version_directory))
299 for command
in commands:
301 subprocess.call( command, shell=
True)
302 except Exception
as e:
303 print(
'Command failed due to:', e)
304 print(
'Do you have EOS available on this machine?')
305 except Exception
as e:
306 print(
'FATAL: Unable to copy files due to:', e)
309 f = open(test.new_version_directory+
'/info.txt',
'w')
310 f.write(
'Merge URL: https://gitlab.cern.ch/atlas/athena/-/merge_requests/{}\n'.format(test.mr))
311 f.write(
'Date: {}\n'.format(test.date))
312 f.write(
'AMI: {}\n'.format(test.tag))
313 f.write(
'Test name: {}\n'.format(test.name))
314 f.write(
'Files copied from: {}\n'.format(test.copied_file_path))
322 if (
'ERROR' in row[0]):
326 url = href[href.find(
'"')+1:]
327 url = url[:url.find(
'"')]
331 value = href[href.find(
'>')+1:]
332 value = value[:value.find(
'<')]
341 test_error_counts = int(test_counts.split(
' ')[0])
342 if test_error_counts > 0:
344 project_url =
'https://bigpanda.cern.ch'+
strip_url(project[11])
345 headers = {
'Accept':
'application/json'}
346 r = requests.get(project_url+
'&json', headers=headers)
347 data = r.json()[
"rows_s"]
351 headers = {
'Accept':
'application/json'}
352 r = requests.get(url+
'&json', headers=headers)
353 data = r.json()[
"rows_s"]
356 for project
in data[1:]:
360 print(
'Summary of tests which need work:')
362 if not failing_tests:
363 print(
" -> None found. Aborting.")
367 reference_folders = []
368 for branch,tests
in failing_tests.items():
369 print (
' * Branch: {}'.format(branch))
372 if test.type ==
'DiffPool':
373 if not test.new_version_directory:
374 print(
'FATAL: No path to "new version" for test {} of type DiffPool.'.format(test.name))
377 if os.path.exists(test.new_version_directory):
378 msg = f
'WARNING: The directory {test.new_version_directory} already exists. Are you sure you want to overwrite the existing references?'
379 if input(
"%s (y/N) " % msg).lower() !=
'y':
382 if (test.existing_ref
not in reference_folders):
383 reference_folders.append(test.existing_ref)
384 elif check_for_duplicates:
385 print(
'FATAL: Found two tests which both change the same reference file: {}, which is not supported.'.format(test.existing_ref))
386 print(
'Consider running again in --test-run mode, to get a copy of the copy commands that could be run.')
387 print(
'The general advice is to take the largest file (since it will have the most events), and/or take the non-legacy one.')
390 return 'https://gitlab.cern.ch/atlas/athena/-/merge_requests/'+mr
392if __name__ ==
'__main__':
393 parser = argparse.ArgumentParser(description=__doc__,
394 formatter_class=argparse.RawDescriptionHelpFormatter)
395 parser.add_argument(
'url', help=
'URL to CITest (put in quotes))')
396 parser.add_argument(
'--test-run',help=
'Update local text files, but do not actually touch EOS.', action=
'store_true')
397 args = parser.parse_args()
398 print(
'Update reference files for URL: {}'.format(args.url))
400 if not args.url.startswith((
'http://',
'https://')):
401 print(
'invalid url - should start with http:// or https://')
407 print(
' -> Running in test mode so will not touch EOS, but will only modify files locally (these changes can easily be reverted with "git checkout" etc).')
409 print(
'========================')
414 print(
'========================')
417 gl_project = gitlab.Gitlab(
"https://gitlab.cern.ch").projects.get(
"atlas/athena")
418 mr = gl_project.mergerequests.get(mr_url.split(
'/')[-1])
419 author = mr.author[
'username']
420 remote = f
'https://:@gitlab.cern.ch:8443/{author}/athena.git'
421 local_branch = f
'mr-{mr.iid}'
423 print(
"The next step is to update the MR with the new content i.e. the References.py file and the digest files.")
424 print(
" IMPORTANT: before you do this, you must first make sure that the local repository is on same branch as the MR by doing:")
425 print(f
" $ git fetch --no-tags {remote} {mr.source_branch}:{local_branch}")
426 print(f
" $ git switch {local_branch}")
427 print(
" $ git rebase upstream/main")
430 msg =
'Would you like to (locally) update digest ref files and/or versions in References.py?'
431 update_local_files =
False
432 if input(
"%s (y/N) " % msg).lower() ==
'y':
433 not_in_athena_dir = subprocess.call(
"git rev-parse --is-inside-work-tree", shell=
True)
434 if not_in_athena_dir:
435 print(
'FATAL: You must run this script from within the athena directory.')
437 update_local_files =
True
441 if commands
and args.test_run:
443 print(
' -> In test-run mode. In normal mode we would also have executed:')
444 for command
in commands:
446 if not args.test_run:
448 print(
"Finished! Before pushing, you might want to manually trigger an EOS to cvmfs copy here: https://atlas-jenkins.cern.ch/view/all/job/ART_data_eos2cvmfs/")
449 print(
"Then commit your changes and (force) push the updated branch to the author's remote:")
450 print(
" $ git commit")
451 print(f
" $ git push [-f] {remote} {local_branch}:{mr.source_branch}")
void print(char *figname, TCanvas *c1)
__init__(self, name, tag, mr, date, existing_ref, existing_version, new_version, new_version_directory, copied_file_path, digest_old, digest_new, type)
std::vector< std::string > split(const std::string &s, const std::string &t=":")
process_digest_change(text, ami_tag, mr_number, human_readable_date, test_name)
create_dir_and_copy_refs(test, actually_update=False)
process_diffpool_change(text, ami_tag, mr_number, human_readable_date, test_name)
summarise_failing_tests(check_for_duplicates=True)
process_log_file(url, branch, test_name)
extract_links_from_json(url)
process_CI_Builds_Summary(project)
process_CI_Tests_json(data)
update_reference_files(actually_update=True, update_local_files=False)