62def validateInputMetadata(infile):
63 logging.info(f"Using input file {infile}")
64
65 current_file = ROOT.TFile(infile)
66 md = read_metadata(infile, mode="full", unique_tag_info_values=False)
67
68 nevts_esi = next(
69 (
70 md[infile][key]["numberOfEvents"]
71 for key, value in md[infile].items()
72 if isinstance(value, dict) and "numberOfEvents" in value
73 ),
74 None,
75 )
76
77 dataHeaderTree = current_file.Get(PoolOpts.TTreeNames.DataHeader)
78 if isinstance(dataHeaderTree, ROOT.TTree):
79 nevts_dh = dataHeaderTree.GetEntriesFast()
80 else:
81 if (
82 current_file.GetListOfKeys().Contains(PoolOpts.RNTupleNames.DataHeader)
83 and ROOT.gROOT.GetVersionInt() < 63100
84 ):
85 raise RuntimeError(
86 "ROOT ver. 6.31/01 or greater needed to read RNTuple files"
87 )
88 dataHeaderRNT = current_file.Get(PoolOpts.RNTupleNames.DataHeader)
89 if isRNTuple( dataHeaderRNT ):
90 nevts_dh = ROOT.Experimental.RNTupleReader.Open(dataHeaderRNT).GetNEntries()
91 else:
92 nevts_dh = None
93
94 if not (md[infile]["nentries"] == nevts_esi == nevts_dh):
95 logging.error(
96 "Number of events from EventStreamInfo inconsistent with number of entries in DataHeader"
97 )
98 return 1
99
100 tag_info = md[infile]["/TagInfo"]
101 if "project_name" in tag_info and isinstance(tag_info["project_name"], list):
102 if "IS_SIMULATION" in tag_info["project_name"] and any(
103 [item for item in tag_info["project_name"] if item.startswith("data")]
104 ):
105 logging.error("/TagInfo contains values reserved for both MC and data")
106 return 1
107 if (
108 len(
110 item[5:6]
111 for item in tag_info["project_name"]
112 if item.startswith("data")
113 )
114 )
115 > 1
116 ):
117 logging.error("/TagInfo contains values from different data taking periods")
118 return 1
119 if (
120 "data_year" in tag_info
121 and isinstance(tag_info["data_year"], list)
122 and len(
set(tag_info[
"data_year"])) > 1
123 ):
124 logging.error("/TagInfo contains values from different data taking periods")
125 return 1
126
127 return 0
128
129