ATLAS Offline Software
MetaDiff.py
Go to the documentation of this file.
1 """The function in this module you should look to be using is meta_diff"""
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
4 # This script reads metadata from a given file
5 from __future__ import print_function
6 
7 import logging
8 import re
9 
10 from PyUtils.MetaReader import read_metadata, trigger_keys
11 
12 
13 def summary(content):
14  """Create a summary string for an object"""
15  if isinstance(content, str):
16  return content
17 
18  try:
19  try:
20  working_copy = content.items()
21  except AttributeError:
22  working_copy = content
23  result = ''
24  for key, value in working_copy:
25  result += "{}: {}, ".format(key, summary(value))
26  return result
27  except (TypeError, ValueError,):
28  pass
29 
30  try:
31  if len(content) < 3:
32  return str(content)
33  return "[{}, {}, ..., {}]".format(
34  summary(content[0]), summary(content[1]), summary(content[-1])
35  )
36  except TypeError:
37  pass
38 
39  return str(content)
40 
41 
42 def truncateDict(value):
43  """Create truncted string replaceing dicts with {...}"""
44  return ', '.join(
45  [
46  '{}: {}'.format(
47  k,
48  '{...}' if isinstance(v, dict) else v
49  )
50  for k, v in sorted(value.items())
51  ]
52  )
53 
54 
55 def print_diff(parent_key, obj1, obj2, diff_format, filter_key, key_only):
56  """build comparison string for two non-dictionary objects"""
57 
58  if filter_key is not None and filter_key(parent_key) is False:
59  # skip this key
60  return
61 
62  result = "\n"
63 
64  if diff_format == "simple":
65  if not obj1:
66  result += "{} has been inserted".format(parent_key)
67  elif not obj2:
68  result += "{} has been deleted".format(parent_key)
69  else:
70  if key_only:
71  result += "{} has changed".format( parent_key )
72  else:
73  result += "{} has changed from '{}' to '{}'".format(
74  parent_key, obj1, obj2
75  )
76  result += "\n"
77  else:
78 
79  if parent_key is not None:
80 
81  if key_only:
82  result += "{}".format(parent_key)
83  else:
84  result += "{}:\n".format(parent_key)
85  try:
86  overlap = set(obj1).intersection(set(obj2))
87  for item in overlap:
88  obj1.remove(item)
89  obj2.remove(item)
90  except (AttributeError, TypeError,):
91  pass
92  result += """\
93  < {}
94  ----------
95  > {}
96  """.format(
97  summary(obj1), summary(obj2)
98  )
99 
100  return result
101 
102 
103 def print_diff_type(parent_key, obj1, obj2, diff_format, filter_key, key_only):
104  """Build diff string for objet of different type"""
105 
106  if filter_key is not None and filter_key(parent_key) is False:
107  # skip this key
108  return
109 
110  result = "\n"
111 
112  if diff_format == "simple":
113  if obj1 is None:
114  result += "{} has been inserted".format(parent_key)
115  elif obj2 is None:
116  result += "{} has been deleted".format(parent_key)
117  else:
118  if key_only:
119  result += (
120  "{} has changed changed type from {} to {}"
121  ).format(parent_key, type(obj1), type(obj2))
122  else:
123  result += (
124  "{} has changed changed type from {} (value: '{}') to "
125  "{} (value: '{}')"
126  ).format(parent_key,
127  type(obj1), obj1,
128  type(obj2), obj2)
129  result += "\n"
130  else:
131  if parent_key is not None:
132  if key_only:
133  result += "{}".format(parent_key)
134  else:
135  result += "{}:\n".format(parent_key)
136  result += """\
137  < {} (type: {})
138  ----------
139  > {} (type: {})
140  """.format(
141  summary(obj1), type(obj1), summary(obj2), type(obj2)
142  )
143 
144  return result
145 
146 
147 def print_diff_dict_keys(parent_key, obj1, obj2, diff_format, filter_key, key_only):
148  """build diff style string for dictionary objects"""
149 
150  if filter_key is not None and filter_key(parent_key) is False:
151  # skip this key
152  return
153 
154  result = '\n'
155  if diff_format != 'simple':
156  shared_keys = set(obj1.keys()).intersection(obj2.keys())
157  for k in shared_keys:
158  if obj1[k] == obj2[k]:
159  try:
160  obj1.pop(k, None)
161  obj2.pop(k, None)
162  except TypeError:
163  pass
164 
165  if diff_format == 'simple':
166  if obj1 is None:
167  result += "{} has been inserted".format(parent_key)
168  elif obj2 is None:
169  result += "{} has been deleted".format(parent_key)
170  else:
171 
172  if key_only:
173  result += "{} has changed".format(parent_key)
174  else:
175  value1 = truncateDict(obj1)
176  value2 = truncateDict(obj2)
177  result += "{} has changed from '{}' to '{}'".format(
178  parent_key, value1, value2
179  )
180  else:
181  if parent_key is not None:
182 
183 
184  if key_only:
185  result += "{}".format(parent_key)
186  else:
187  result += "{}:\n".format(parent_key)
188  result += """\
189  < {}
190  ----------
191  > {}
192  """.format( summary(obj1), summary(obj2) )
193 
194  result += "\n"
195 
196  return result
197 
198 
199 def compare(obj1, obj2, parent_key=None, ordered=False, diff_format="simple", filter_key=None, key_only=False):
200  """Caclulate difference between two objects
201 
202  Keyword arguments:
203  obj1 -- first object in comparision
204  obj2 -- second object in comparision
205  parent_key -- the key of the objects in the parent, used in recursion
206  ordered -- whether to check order of list content
207  """
208  result = list()
209 
210  if not ordered and isinstance(obj1, list):
211  obj1.sort()
212 
213  if not ordered and isinstance(obj2, list):
214  obj2.sort()
215 
216  if obj1 == obj2:
217  return result
218 
219  if isinstance(obj1, type(obj2)):
220 
221  if isinstance(obj1, dict):
222 
223  if sorted(obj1.keys()) != sorted(obj2.keys()):
224  result += [
225  print_diff_dict_keys(parent_key, obj1, obj2, diff_format, filter_key, key_only)
226  ]
227  else:
228  for key in sorted(set(obj1.keys()) | set(obj2.keys())):
229  if parent_key:
230  child_key = "{}/{}".format(parent_key, key)
231  else:
232  child_key = key
233  result += compare(
234  obj1[key], obj2[key], child_key, ordered, diff_format, filter_key, key_only
235  )
236 
237  else:
238  result += [print_diff(parent_key, obj1, obj2, diff_format, filter_key, key_only)]
239 
240  else:
241  result += [print_diff_type(parent_key, obj1, obj2, diff_format, filter_key, key_only)]
242 
243  return result
244 
245 
246 def compare_dicts(test, reference, ordered=False, diff_format="simple", filter_key = None, key_only = False):
247  """Show the differences between two dictionaries
248 
249  Args:
250  test (dict): first object in comparision
251  reference (dict): second object in comparision
252  ordered (bool): whether to check order of list content
253  diff_format (string): specify a format to display the difference in
254  """
255  result = list()
256 
257  keys = set(test.keys()).union(reference.keys())
258  for key in keys:
259 
260  try:
261  val1 = test[key]
262  except KeyError:
263  val1 = None
264  try:
265  val2 = reference[key]
266  except KeyError:
267  val2 = None
268 
269  result += compare(
270  obj1=val1,
271  obj2=val2,
272  parent_key=key,
273  ordered=ordered,
274  diff_format=diff_format,
275  filter_key=filter_key,
276  key_only=key_only
277  )
278  return result
279 
281  files,
282  verbose=False,
283  ordered=False,
284  drop=None,
285  mode="lite",
286  meta_key_filter=None,
287  file_type=None,
288  promote=False,
289  diff_format="simple",
290  regex=False,
291  key_only=False,
292  ignore_trigger=False,
293 ):
294  """
295  Compare the in-file metadata in two given files. Uses PyUtils.MetaReader
296  to obtain file content. Generates list of string that show difference.
297  Returns empty list if no difference is found
298 
299  Keyword arguments:
300  files -- Names of two files to compare
301  verbose -- toggle to get debug information
302  ordered -- whether to check order of lists in the metadata
303  drop -- keys to drop from metadata retrieved by MetaReader
304  mode -- MetaReader argument setting amount of content (default 'lite').
305  Allowed values are: tiny, lite, peeker, and full
306  meta_key_filter -- MetaReader argument selecting keys to retrieve (default
307  get all)
308  file_type -- Type of files, POOL or BS (default: auto-configure)
309  promote -- MetaReader argument (default: False)
310  diff_format -- Return 'simple' or 'diff' style string (default: 'simple')
311  regex -- Use regex for the drop filter (default: False)
312  key_only -- Show only the keys instead of their value (default: False)
313  """
314  if len(files) != 2:
315  raise ValueError("Wrong number of files passes, need two")
316 
317  reader_msg = logging.getLogger("MetaReader")
318  reader_msg.setLevel(logging.INFO if verbose else logging.WARNING)
319 
320  msg = logging.getLogger("MetaDiff")
321  msg.setLevel(logging.DEBUG if verbose else logging.INFO)
322 
323  msg.debug("Reading from %s and %s", files[0], files[1])
324 
325  metadata = read_metadata(
326  files,
327  file_type,
328  mode=mode,
329  meta_key_filter=meta_key_filter,
330  promote=promote,
331  )
332 
333  if drop is not None and regex:
334  for i in range(len(drop)):
335  drop[i] = re.compile( drop[i] )
336 
337  def filter_key(key):
338 
339  if drop is not None:
340  for drop_key in drop:
341  if not regex:
342  if key.startswith(drop_key):
343  return False
344  else:
345  if drop_key.match(key):
346  return False
347 
348  if ignore_trigger:
349  for trigger_key in trigger_keys:
350  if key.startswith(trigger_key):
351  return False
352 
353  return True
354 
355  result = compare_dicts(
356  metadata[files[0]],
357  metadata[files[1]],
358  ordered=ordered,
359  diff_format=diff_format,
360  filter_key=filter_key,
361  key_only=key_only
362  )
363 
364  if not result:
365  msg.info("No differences found")
366 
367  return list(sorted([r for r in result if r is not None ]))
vtune_athena.format
format
Definition: vtune_athena.py:14
python.MetaDiff.compare
def compare(obj1, obj2, parent_key=None, ordered=False, diff_format="simple", filter_key=None, key_only=False)
Definition: MetaDiff.py:199
python.MetaReader.read_metadata
def read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter=None, unique_tag_info_values=True, ignoreNonExistingLocalFiles=False)
Definition: MetaReader.py:52
python.MetaDiff.print_diff_type
def print_diff_type(parent_key, obj1, obj2, diff_format, filter_key, key_only)
Definition: MetaDiff.py:103
intersection
std::vector< std::string > intersection(std::vector< std::string > &v1, std::vector< std::string > &v2)
Definition: compareFlatTrees.cxx:25
python.MetaDiff.compare_dicts
def compare_dicts(test, reference, ordered=False, diff_format="simple", filter_key=None, key_only=False)
Definition: MetaDiff.py:246
python.MetaDiff.print_diff
def print_diff(parent_key, obj1, obj2, diff_format, filter_key, key_only)
Definition: MetaDiff.py:55
python.MetaDiff.truncateDict
def truncateDict(value)
Definition: MetaDiff.py:42
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
python.MetaDiff.print_diff_dict_keys
def print_diff_dict_keys(parent_key, obj1, obj2, diff_format, filter_key, key_only)
Definition: MetaDiff.py:147
DerivationFramework::TriggerMatchingUtils::sorted
std::vector< typename T::value_type > sorted(T begin, T end)
Helper function to create a sorted vector from an unsorted one.
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:224
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.CaloScaleNoiseConfig.type
type
Definition: CaloScaleNoiseConfig.py:78
python.MetaDiff.meta_diff
def meta_diff(files, verbose=False, ordered=False, drop=None, mode="lite", meta_key_filter=None, file_type=None, promote=False, diff_format="simple", regex=False, key_only=False, ignore_trigger=False)
Definition: MetaDiff.py:280
str
Definition: BTagTrackIpAccessor.cxx:11
python.MetaDiff.summary
def summary(content)
Definition: MetaDiff.py:13