ATLAS Offline Software
Loading...
Searching...
No Matches
GeneratorSettingsSemantics.py
Go to the documentation of this file.
1# Copyright (C) 2002-2026 CERN for the benefit of the ATLAS collaboration
2
3from enum import Enum, IntEnum
4import re
5
6from GaudiConfig2.semantics import getSemanticsFor, SequenceSemantics
7from AthenaCommon.Logging import logging
8
9genSettingsLog = logging.getLogger("GeneratorSettingsSemantics")
10
11
12class GeneratorSettingsKeep(str, Enum):
13 """
14 Which duplicate setting survives after layers are sorted by precedence.
15
16 Layers are resolved in increasing precedence order: BASE, then TUNE, then
17 USER. A duplicate is either a parsed command with the same normalized key,
18 or an unparsed command with the same normalized full text.
19
20 FIRST keeps the first duplicate encountered. This preserves lower-precedence
21 defaults when later layers repeat the same setting.
22
23 LAST keeps the last duplicate encountered. This is the normal generator
24 behavior: tune settings override base settings, and user settings override
25 both.
26 """
27 FIRST = "first"
28 LAST = "last"
29
30
31class GeneratorSettingsRecord(str, Enum):
32 """
33 How one command string is understood during merging.
34
35 Each command is parsed with the separators from GeneratorSettingsLayer.
36 Commands that contain one of those separators are treated as parsed
37 key/value commands, e.g. "Main:timesAllowErrors = 500". These can override
38 each other by key.
39
40 Commands without any configured separator are unparsed commands. They have
41 no key, so they can only be deduplicated when their normalized full text is
42 identical.
43 """
44 PARSED_COMMAND = "parsed_command"
45 UNPARSED_COMMAND = "unparsed_command"
46
47
49 """
50 Precedence levels for generator settings layers.
51 Lower precedence layers are overridden by higher ones during merging.
52 """
53 BASE = 10
54 TUNE = 20
55 USER = 100
56
57
59 """
60 A layer contains all generator settings from a single source,
61 e.g. a user fragment, tune, etc.
62
63 Normal assignments still use a plain sequence of strings. Layers are only
64 needed by configuration fragments that want to describe where commands came
65 from and how they should be ordered during CA merging.
66 """
67
68 def __init__(self,
69 source,
70 values,
71 precedence,
72 separators="=",
73 keep=GeneratorSettingsKeep.LAST,
74 report_context=None):
75 if isinstance(values, str):
76 raise TypeError("GeneratorSettingsLayer values must be a sequence")
77
78 # Label used in conflict/duplicate reports
79 self.source = str(source)
80
81 # Generator command strings belonging to this layer
82 self.values = tuple(values or ())
83
84 # Precedence used for deduplication and conflict resolution during merging
86
87 # Separators used to parse a generator setting command into a key/value
88 # pair for duplicate detection
89 if isinstance(separators, str):
90 separators = [separators]
91 self.separators = tuple(separators)
92
93 # Which matching key wins after sorting by layer precedence
95
96 # Text that should be mentioned in the deduplication report,
97 # usually the property name from the fragment.
98 self.report_context = report_context
99
100
102 """
103 Stored property value used by GeneratorSettingsSemantics.
104
105 It keeps the unresolved layers rather than a pre-merged list so that
106 CA merges remain associative: base.merge(user) and user.merge(base)
107 yield the same result.
108 """
109 def __init__(self, layers=None):
110 self.layers = []
112 seen = set()
113
114 for layer in layers or ():
115 if not isinstance(layer, GeneratorSettingsLayer):
116 layer_type = type(layer).__name__
117 raise TypeError(
118 f"GeneratorSettingsValue layers must be "
119 f"GeneratorSettingsLayer instances, got {layer_type}"
120 )
121
122 # Deduplicate identical layers explicitly instead of relying on
123 # object identity. This is what makes repeated symmetric CA merges
124 # yield the same result.
125 key = (
126 layer.source,
127 layer.values,
128 layer.precedence,
129 layer.separators,
130 layer.keep,
131 layer.report_context,
132 )
133 if key not in seen:
134 self.layers.append(layer)
135 seen.add(key)
136
137 @property
138 def data(self):
139 return self.resolve()
140
141 def resolve(self, report_context=None, emit_report=False):
142 """
143 Resolve all layers to the final settings list.
144
145 If emit_report is True, print duplicate/conflict warnings once using
146 report_context (or a layer-provided report_context if available).
147 """
148 settings, report = self._resolve()
149
150 if emit_report:
151 for settings_layer in _ordered_layers(self.layers):
152 if settings_layer.report_context:
153 report_context = settings_layer.report_context
154 break
155
156 if report_context and report_context not in self._reported_contexts:
157 _log_report(report_context, report)
158 self._reported_contexts.add(report_context)
159
160 return settings
161
162 def __str__(self):
163 """
164 Return the resolved list as a string.
165 ComponentAccumulator.gatherProps() converts stored property values
166 to strings before handing them to Gaudi.
167 """
168 return str(self.resolve(emit_report=True))
169
170 def _resolve(self):
171 """Return the final command list plus the duplicate/conflict report."""
172 layers = _ordered_layers(self.layers)
173 separators = layers[0].separators if layers else ("=",)
174 keep = layers[0].keep if layers else GeneratorSettingsKeep.LAST
175 precedences_seen = {}
176 for layer in layers:
177 if (
178 layer.separators == separators
179 and layer.keep == keep
180 ):
181 if layer.precedence in precedences_seen:
182 previous_layer = precedences_seen[layer.precedence]
183 raise ValueError(
184 f"cannot merge generator settings with duplicate "
185 f"precedence {layer.precedence.name}: "
186 f"{previous_layer.source} and {layer.source}"
187 )
188 precedences_seen[layer.precedence] = layer
189 continue
190
191 summary = ", ".join(
192 f"{entry.source}: separators={entry.separators}, "
193 f"keep={entry.keep.value}"
194 for entry in layers
195 )
196 raise ValueError(
197 f"cannot merge generator settings with different parsing "
198 f"settings: "
199 f"{summary}"
200 )
201
202 records = _build_records(layers, separators)
203 kept_records, removed_records = _deduplicate_records(records, keep)
204 return (
205 [record["original_setting"] for record in kept_records],
206 _build_report(records, kept_records, removed_records),
207 )
208
209
210class GeneratorSettingsSemantics(SequenceSemantics):
211 """
212 Semantics for generator settings properties.
213
214 Gaudi sees the property as a sequence of strings, but CA merging receives
215 GeneratorSettingsLayer objects so fragments can be combined by precedence
216 before the final list is handed to the C++ component.
217 """
218 __handled_types__ = [re.compile(r"^GeneratorSettings<.*>$")]
219
220 def __init__(self, cpp_type):
221 # SequenceSemantics needs the element semantics even though we store a
222 # custom helper value instead of Gaudi's normal list helper.
223 super().__init__(cpp_type, valueSem=getSemanticsFor("std::string"))
224
225 def store(self, assigned_value):
226 """Convert user input into the internal unresolved layer container."""
227 if isinstance(assigned_value, GeneratorSettingsValue):
228 return assigned_value
229 if isinstance(assigned_value, GeneratorSettingsLayer):
230 return GeneratorSettingsValue([assigned_value])
231 if isinstance(assigned_value, str):
232 raise TypeError("generator settings must be assigned from a sequence")
235 source=self.name or "<unknown>",
236 values=tuple(assigned_value or ()),
237 precedence=GeneratorSettingsPrecedence.USER,
238 report_context=self.name,
239 )
240 ])
241
242 def default(self, default_commands):
243 """Store C++ defaults as the lowest-precedence command layer."""
244 if not default_commands:
246 if isinstance(default_commands, str):
247 raise TypeError("generator settings must be assigned from a sequence")
250 source=self.name or "<default>",
251 values=tuple(default_commands or ()),
252 precedence=GeneratorSettingsPrecedence.BASE,
253 report_context=self.name,
254 )
255 ])
256
257 def merge(self, current_value, incoming_value):
258 """Merge two unresolved layer sets during CA merging."""
259 current_settings = self.store(current_value)
260 incoming_settings = self.store(incoming_value)
262 current_settings.layers + incoming_settings.layers
263 )
264
265 def opt_value(self, stored_value):
266 """
267 Return the final plain command list.
268 This is the point where all layers are resolved.
269 Print duplicate and conflict warnings here.
270 """
271 settings_value = self.store(stored_value)
272 return settings_value.resolve(report_context=self.name, emit_report=True)
273
274
275def _ordered_layers(layers):
276 """Apply precedence before deduplication."""
277 return sorted(
278 layers,
279 key=lambda layer: (
280 int(layer.precedence),
281 layer.source,
282 repr(layer.values),
283 ),
284 )
285
286
287def _build_records(layers, separators):
288 """
289 Convert raw command strings into normalized records.
290 Parsed commands are deduplicated by key. Commands that cannot be parsed as
291 key/value records are deduplicated by their normalized full text.
292 """
293 records = []
294 for layer in layers:
295 for raw_setting in layer.values:
296 key_text, value_text = _parse_assignment(raw_setting, separators)
297 if key_text is None:
298 records.append({
299 "source_name": layer.source,
300 "record_kind": GeneratorSettingsRecord.UNPARSED_COMMAND,
301 "normalized_key": None,
302 "normalized_value": None,
303 "original_setting": raw_setting,
304 "dedup_signature": (
305 GeneratorSettingsRecord.UNPARSED_COMMAND,
306 _normalize_text(raw_setting),
307 ),
308 })
309 continue
310
311 normalized_key = _normalize_text(key_text)
312 records.append({
313 "source_name": layer.source,
314 "record_kind": GeneratorSettingsRecord.PARSED_COMMAND,
315 "normalized_key": normalized_key,
316 "normalized_value": _normalize_text(value_text),
317 "original_setting": raw_setting,
318 "dedup_signature": (
319 GeneratorSettingsRecord.PARSED_COMMAND,
320 normalized_key,
321 ),
322 })
323 return records
324
325
326def _parse_assignment(setting_text, separators):
327 text = str(setting_text).strip()
328 for separator in separators:
329 if separator in text:
330 key_text, value_text = text.split(separator, 1)
331 return key_text.strip(), value_text.strip()
332 return None, None
333
334
336 text = str(value).strip()
337 return " ".join(text.split())
338
339
340def _deduplicate_records(records, keep):
341 """Keep the first or last record for each normalized setting key."""
342 keep_last = keep == GeneratorSettingsKeep.LAST
343 records_to_scan = reversed(records) if keep_last else records
344
345 kept_records = []
346 removed_records = []
347 first_seen_by_signature = {}
348 for record in records_to_scan:
349 signature = record["dedup_signature"]
350 if signature in first_seen_by_signature:
351 kept_record = first_seen_by_signature[signature]
352 removed_record = dict(record)
353 removed_record["duplicate_of_source_name"] = kept_record["source_name"]
354 removed_record["kept_normalized_value"] = kept_record["normalized_value"]
355 removed_record["kept_original_setting"] = kept_record["original_setting"]
356 removed_records.append(removed_record)
357 continue
358
359 first_seen_by_signature[signature] = record
360 kept_records.append(record)
361
362 if keep_last:
363 kept_records.reverse()
364 removed_records.reverse()
365 return kept_records, removed_records
366
367
368def _build_report(records, kept_records, removed_records):
369 """Collect duplicate and conflict information for logging/tests."""
370 removed_duplicates = [
371 {
372 "source": record["source_name"],
373 "duplicate_of_source": record.get("duplicate_of_source_name"),
374 "setting": record["original_setting"],
375 "kept_setting": record.get("kept_original_setting"),
376 "normalized_value": record["normalized_value"],
377 "kept_normalized_value": record.get("kept_normalized_value"),
378 }
379 for record in removed_records
380 ]
381
382 removed_identical = [
383 {
384 "source": record["source"],
385 "duplicate_of_source": record.get("duplicate_of_source"),
386 "setting": record["setting"],
387 "kept_setting": record.get("kept_setting"),
388 }
389 for record in removed_duplicates
390 if record["normalized_value"] == record.get("kept_normalized_value")
391 ]
392 duplicates_in_source = {}
393 duplicates_across_sources = {}
394
395 for record in removed_identical:
396 source_name = record.get("source", "<unknown>")
397 duplicate_of_source = record.get("duplicate_of_source", "<unknown>")
398 if duplicate_of_source == source_name:
399 duplicates_in_source[source_name] = (
400 duplicates_in_source.get(source_name, 0) + 1
401 )
402 continue
403
404 source_pair = (source_name, duplicate_of_source)
405 duplicates_across_sources[source_pair] = (
406 duplicates_across_sources.get(source_pair, 0) + 1
407 )
408
409 conflict_details = _build_conflict_details(records, kept_records)
410
411 return {
412 "removed_duplicates": removed_duplicates,
413 "removed_identical": removed_identical,
414 "conflicting_reassignments": [
415 record
416 for record in removed_duplicates
417 if record["normalized_value"] != record.get("kept_normalized_value")
418 ],
419 "removed_overridden": [
420 record
421 for record in removed_duplicates
422 if record["normalized_value"] != record.get("kept_normalized_value")
423 ],
424 "conflict_details": conflict_details,
425 "conflicts": _find_conflicts(records),
426 "duplicates_in_source": duplicates_in_source,
427 "duplicates_across_sources": [
428 {
429 "source": source_name,
430 "duplicate_of_source": duplicate_of_source,
431 "count": duplicate_count,
432 }
433 for (source_name, duplicate_of_source), duplicate_count
434 in sorted(duplicates_across_sources.items())
435 ],
436 }
437
438
439def _build_conflict_details(records, kept_records):
440 """
441 Build one reporting entry per conflicting parsed key.
442
443 The source and value order follows the original record order, and the kept
444 value is marked explicitly in the value list.
445 """
446 kept_value_by_key = {}
447 for record in kept_records:
448 if record["record_kind"] != GeneratorSettingsRecord.PARSED_COMMAND:
449 continue
450 kept_value_by_key[record["normalized_key"]] = record["normalized_value"]
451
452 values_by_key = {}
453 for record in records:
454 if record["record_kind"] != GeneratorSettingsRecord.PARSED_COMMAND:
455 continue
456
457 key = record["normalized_key"]
458 source_name = record["source_name"]
459 value = record["normalized_value"]
460
461 key_entry = values_by_key.setdefault(
462 key,
463 {
464 "sources": [],
465 "source_set": set(),
466 "values": [],
467 "value_set": set(),
468 "source_to_values": {},
469 },
470 )
471 if source_name not in key_entry["source_set"]:
472 key_entry["source_set"].add(source_name)
473 key_entry["sources"].append(source_name)
474 if value not in key_entry["value_set"]:
475 key_entry["value_set"].add(value)
476 key_entry["values"].append(value)
477 key_entry["source_to_values"].setdefault(source_name, set()).add(value)
478
479 conflict_details = []
480 for key, key_entry in values_by_key.items():
481 merged_values = set()
482 for values in key_entry["source_to_values"].values():
483 merged_values.update(values)
484 if len(merged_values) <= 1 or len(key_entry["source_to_values"]) <= 1:
485 continue
486
487 kept_value = kept_value_by_key.get(key)
488 marked_values = []
489 for value in key_entry["values"]:
490 if value == kept_value:
491 marked_values.append(f"{value} (kept)")
492 continue
493 marked_values.append(value)
494
495 conflict_details.append({
496 "key": key,
497 "sources": key_entry["sources"],
498 "values": marked_values,
499 })
500 return conflict_details
501
502
503def _find_conflicts(records):
504 """Find keys assigned to multiple values within or across sources."""
505 values_by_key_and_source = {}
506 for record in records:
507 if record["record_kind"] != GeneratorSettingsRecord.PARSED_COMMAND:
508 continue
509
510 key = record["normalized_key"]
511 source_name = record["source_name"]
512 value = record["normalized_value"]
513 values_by_key_and_source.setdefault(key, {}).setdefault(
514 source_name,
515 set(),
516 ).add(value)
517
518 conflicts = []
519 for key, source_to_values in values_by_key_and_source.items():
520 for source_name, values in source_to_values.items():
521 if len(values) > 1:
522 conflicts.append({
523 "type": "intra_source_conflict",
524 "key": key,
525 "source": source_name,
526 "values": sorted(values),
527 })
528
529 merged_values = set()
530 for values in source_to_values.values():
531 merged_values.update(values)
532 if len(merged_values) > 1 and len(source_to_values) > 1:
533 conflicts.append({
534 "type": "inter_source_conflict",
535 "key": key,
536 "sources": sorted(source_to_values.keys()),
537 "values": sorted(merged_values),
538 })
539 return conflicts
540
541
542def _log_report(context, report):
543 """Print warnings from the structured report."""
544 issue_prefix = "Potential issue with generator settings"
545
546 duplicates = report.get("removed_identical", [])
547 conflict_details = report.get("conflict_details", [])
548 if not duplicates and not conflict_details:
549 return
550
551 genSettingsLog.warning(
552 f"{issue_prefix} [{context}]: found {len(duplicates)} duplicate "
553 f"setting(s) across sources and {len(conflict_details)} conflicting "
554 f"setting key(s)"
555 )
556
557 for entry in sorted(
558 duplicates,
559 key=lambda record: (
560 record.get("source", ""),
561 record.get("duplicate_of_source", ""),
562 record.get("setting", ""),
563 ),
564 ):
565 source_name = entry.get("source", "<unknown>")
566 duplicate_of_source = entry.get("duplicate_of_source", "<unknown>")
567 setting = entry.get("setting", "<unknown>")
568 source_list = [source_name]
569 if duplicate_of_source != source_name:
570 source_list.append(duplicate_of_source)
571 genSettingsLog.warning(
572 f"{issue_prefix} [{context}]: duplicate setting from sources "
573 f"[{', '.join(source_list)}]: {setting}"
574 )
575
576 for entry in conflict_details:
577 key = entry.get("key", "<unknown>")
578 sources = ", ".join(entry.get("sources", []))
579 values = ", ".join(entry.get("values", []))
580 genSettingsLog.warning(
581 f"{issue_prefix} [{context}]: conflicting setting '{key}' across "
582 f"sources [{sources}] -> [{values}]"
583 )
__init__(self, source, values, precedence, separators="=", keep=GeneratorSettingsKeep.LAST, report_context=None)
STL class.
bool add(const std::string &hname, TKey *tobj)
Definition fastadd.cxx:55
Definition merge.py:1
_build_report(records, kept_records, removed_records)