ATLAS Offline Software
Loading...
Searching...
No Matches
CDIReader.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2026 CERN for the benefit of the ATLAS collaboration
3*/
4
6// CDIReader.h, (c) ATLAS Detector software
8
10
11#include <nlohmann/json.hpp>
12
13using json = nlohmann::json;
14
15Analysis::CDIReader::CDIReader(const std::string& cdipath, bool verbose) : m_use_json(false), m_cdipath(cdipath), m_CDIFile(TFile::Open(m_cdipath.c_str(), "READ"))
16{
17 if (!m_CDIFile)[[unlikely]]{
18 std::cout << "Analysis::CDIReader::CDIReader: " << m_cdipath << " could not be opened."<<std::endl;
19 return;
20 }
21 TObjString* s{};
22 m_CDIFile->GetObject("VersionInfo/BuildNumber", s);
23 if (s && verbose){
24 std::cout << " CDI file build number: " << s->GetName() << std::endl;
25 }
26 TList* taggerkeys = m_CDIFile->GetListOfKeys();
27 for (const auto tagger : *taggerkeys){
28 if (!tagger)[[unlikely]] continue;
29 const std::string taggername{tagger->GetName()};
30 if(taggername != "VersionInfo"){
31 // now we have the top-level tagger name, we want to add this to our overall meta data
32 auto* taggerDir = static_cast<TDirectoryFile*>(m_CDIFile->Get(taggername.c_str()));
33 record_metadata(taggername, 0);
34 crawlCDI(taggerDir, 0, taggername);
35 }
36 }
37}
38
39
40void Analysis::CDIReader::crawlCDI(TDirectoryFile* parentDir, int depth, const std::string& metamap){
41
42 TList* keys = parentDir->GetListOfKeys(); // get directories
43 if(isWPdirectory(keys)){
44 // now that we are at the level of the individual label directories
45 // interact with them, and extract the data they store
46
47 TList* labelkeys = parentDir->GetListOfKeys();
48 Data theseData; // labels, systematics, DSIDS, etc.
49 Labels theseLabels;
50 std::set<std::string> DSID_set; // record all DSID names in the flavour configuration
51 std::set<std::string> systematics_set; // record all uncertainties met in flavour configuration
52
53 Labels path = split(metamap);
54 std::string taggername = path.at(0);
55 std::string jetcollname = path.at(1);
56 std::string workingpointname = path.at(2);
57
58 // For each "label" stored in the working point directory,
59 // we need to access the contents of the label directory
60 // and construct the metadata map
61 for(const auto label : *labelkeys){
62 std::string labelname = label->GetName();
63 if(labelname == "cutvalue" || labelname.find("fraction") != std::string::npos) continue;
64 m_labels.insert(labelname);
65 theseLabels.push_back(labelname);
66
67 // now enter the directory to access uncertainty info for this flavour
68 TDirectoryFile* flavourDir = (TDirectoryFile*)parentDir->Get(labelname.c_str());
69 if(flavourDir){
70 Labels uncertainties; // flavour specific uncertainties
71 TList* DSIDkeys = flavourDir->GetListOfKeys(); // this is the list of all the items in the flavour (DSID etc)
72 for(const auto CDHistCont : *DSIDkeys){
73 std::string DSIDname = CDHistCont->GetName();
74 DSID_set.insert(DSIDname);
75 m_DSIDs.insert(DSIDname);
76 if(DSIDname == "default_SF"){ // let's access the systematic uncertainties
77 // construct the total path
78 std::string dir = taggername + "/" + jetcollname + "/" + workingpointname + "/" + labelname + "/default_SF";
80 m_CDIFile->GetObject(dir.c_str(), cont);
81 if(!cont){
82 std::cout << "No default_SF CalibrationDataHistogramContainer?" << std::endl;
83 } else {
84 uncertainties = cont->listUncertainties();
85 for(const std::string &s : uncertainties){
86 systematics_set.insert(s);
87 }
88 }
89 // add the flavour specific uncertainties here
90 std::string flav_spec_unc_name = labelname + "_syst";
91 theseData[flav_spec_unc_name] = uncertainties;
92 }
93 }
94 } else {
95 std::cout << "No flavour directory?" << std::endl;
96 }
97 }
98 // sort and add the labels to the Data object
99 std::sort(theseLabels.begin(), theseLabels.end());
100 theseData["labels"] = std::move(theseLabels);
101 // convert DSID set to vector of strings
102 Labels theseDSIDs(DSID_set.size());
103 std::copy(DSID_set.begin(), DSID_set.end(), theseDSIDs.begin());
104 theseData["DSIDs"] = std::move(theseDSIDs);
105 // convert systematic set to vector of strings
106 Labels theseSystematics(systematics_set.size());
107 std::copy(systematics_set.begin(), systematics_set.end(), theseSystematics.begin());
108 theseData["systematics"] = std::move(theseSystematics);
109 // Construct this branch of the metadata map
110 // and record the Data object
111 record_metadata_map(theseData, metamap);
112 } else {
113 for(const auto coll: *keys){
114 std::string collname = coll->GetName();
115 if ( collname.find("cutvalue") != std::string::npos || collname.find("fraction") != std::string::npos) continue;
116 // track the metadata as you traverse
117 record_metadata(collname, depth+1);
118 TDirectoryFile* collDir = (TDirectoryFile*)parentDir->Get(collname.c_str());
119 if(collDir && collname != "VersionInfo"){
120 std::string nextmap = metamap + ";" + collname;
121 crawlCDI(collDir, depth+1, nextmap); // traverse further
122 } else {
123 std::cout << "No collection directory?" << std::endl;
124 }
125 }
126 }
127}
128
129
130
131
132void Analysis::CDIReader::printMetadata(int tagger, int jetcoll, int wpoint, int label){
133 /*
134 This method prints subsets of the available metadata collected from the CDI file.
135
136 The CDI data is organized in a hierarchical directory structure, where taggers contain jet-collections, and jet-collections
137 contain working points, etc.
138
139 This method prints wildcard data such as:
140 - tagger / *
141 - tagger / jetcoll/ *
142 - tagger / jetcoll/ wpoint / *
143 - tagger / jetcoll / wpoint / label
144
145 The method operates on simple integer inputs, to indicate what to print out:
146 - Positive integer values for tagger, jetcoll, wpoint, and label (up to the number of available items of each)
147 will print out information specific to that particular entry (e.g. tagger == 1 will print tagger #1 specific info)
148 - Negative integer values indicate a wildcard, and will print out all available items of this type.
149 - Zero indicates to print nothing for that category.
150 */
151 int current_tagger = (tagger < 0) ? -1 : 0;
152 int current_jetcoll = (jetcoll < 0) ? -1 : 0;
153 int current_wpoint = (wpoint < 0) ? -1 : 0;
154 int current_label = (label < 0) ? -1 : 0;
155
156 for (const auto& [tag, jets] : m_metadata){
157 if(current_tagger != -1) current_tagger += 1;
158 if(tagger != current_tagger || tagger == 0) continue;
159
160 std::cout << "| " << tag << std::endl; // print only the tagger you're interested in
161
162 for (const auto& [jet, wps] : jets){
163 if(current_jetcoll != -1) current_jetcoll += 1;
164 if(jetcoll != current_jetcoll || jetcoll == 0) continue;
165
166 std::cout << "|\\__ " << jet << std::endl;
167
168 int num_wps = wps.size();
169 int num_wp_seen = 0;
170 for(const auto& [wp, labels] : wps){
171 if(current_wpoint != -1) current_wpoint += 1;
172 if(wpoint != current_wpoint || wpoint == 0) continue;
173 num_wp_seen += 1;
174 if(num_wp_seen != num_wps){
175 std::cout << "| |\\__" << wp << std::endl;
176 } else {
177 std::cout << "| \\__" << wp << std::endl;
178 }
179
180 int label_index = 0;
181 Data d = labels;
182 for(const std::string& l : d["labels"]){
183 if(current_label != -1) current_label += 1;
184 if(label != current_label || label == 0) continue;
185 if(num_wp_seen != num_wps && label_index == 0){
186 std::cout << "| | \\___" << " (" << label_index << ") " << l << std::endl;
187 } else if(label_index != 0 && num_wp_seen != num_wps) {
188 std::cout << "| | \\___" << " (" << label_index << ") " << l << std::endl;
189 } else {
190 std::cout << "| \\___" << " (" << label_index << ") " << l << std::endl;
191 }
192 label_index += 1;
193 }
194 }
195 }
196 }
197}
198
199bool Analysis::CDIReader::checkConfig(const std::string& tagger, const std::string& jetcoll, const std::string& wp, bool verbose){
200 // this method checks if your config is correct or not
201 // returns true if correct, false if not
202 // if not correct, it will also print a helpful message
203 bool configured = false;
204 // get the number that would correspond to the index of the tagger/jetcoll/wp
205 // if these were each stored in a vector of strings, sorted alphanumerically
206 // which happens already by default in the (ordered) map
207 int tagger_ind = 0;
208 int jetcoll_ind = 0;
209 int wp_ind = 0;
210
211 if(m_metadata.count(tagger) > 0){
212 // get the tagger index
213 for(const auto& tag : m_metadata){
214 tagger_ind += 1;
215 if(tag.first == tagger) break;
216 }
217 if(m_metadata[tagger].count(jetcoll) > 0){
218 // get the jet collection index
219 for(const auto& jet : m_metadata[tagger]){
220 jetcoll_ind += 1;
221 if(jet.first == jetcoll) break;
222 }
223 if(m_metadata[tagger][jetcoll].count(wp) > 0){
224 // get the working point index
225 for(const auto& wpoint : m_metadata[tagger][jetcoll]){
226 wp_ind += 1;
227 if(wpoint.first == wp) break;
228 }
229 if (verbose) std::cout << " Your configuration looks good! Available labels are : " << std::endl;
230 if (verbose) printMetadata(tagger_ind, jetcoll_ind, wp_ind, -1);
231 // construct vector of labels
232 for(const std::string& flavour_label : m_metadata[tagger][jetcoll][wp]["labels"]){
233 m_label_vec.push_back(flavour_label);
234 }
235 // sort the vector of labels
236 std::sort(m_label_vec.begin(), m_label_vec.end());
237 configured = true;
238 } else {
239 if (verbose) std::cout << "Couldn't find \"" << wp << "\" for " << tagger << " / " << jetcoll << " in this CDI file!" << std::endl;
240 if (verbose) std::cout << "Here are your options :" << std::endl;
241 if (verbose) printMetadata(tagger_ind, jetcoll_ind, -1, 0);
242 }
243 } else {
244 if (verbose) std::cout << "Couldn't find \"" << jetcoll << "\" under " << tagger << " in this CDI file!" << std::endl;
245 if (verbose) std::cout << "Here are your options :" << std::endl;
246 if (verbose) printMetadata(tagger_ind, -1, 0, 0);
247 }
248 } else {
249 if (verbose) std::cout << "Couldn't find \"" << tagger << "\" in this CDI file" << std::endl;
250 if (verbose) std::cout << "Here are your options :" << std::endl;
251 if (verbose) printMetadata(-1,0,0,0);
252 }
253
254 if(m_use_json){
255 // let's make a json object from the nlohmann package and save it to file
256 json json_metadata(m_metadata);
257 // get cwd to save in
258 std::filesystem::path cwd = std::filesystem::current_path();
259 std::filesystem::path filepath = cwd / "CDI.json";
260 std::ofstream output(filepath);
261 output << std::setw(4) << json_metadata << std::endl;
262 }
263
264 m_initialized = configured;
265 return configured;
266}
267
268
269std::vector<std::string> Analysis::CDIReader::getDSIDs(const std::string& tagger, const std::string& jetcollection, const std::string& workingpoint){
270 if(!m_initialized){
271 std::cout << " CDIReader :: You need to validate your configuration before working with (flavour) labels!" << std::endl;
272 }
273 Labels DSIDs;
274 if (tagger.empty() || jetcollection.empty() || workingpoint.empty()){
275 // unless specified, return the vector of all known DSIDs from this CDI file
276 Labels DSID_vec(m_DSIDs.size());
277 std::copy(m_DSIDs.begin(), m_DSIDs.end(), DSID_vec.begin());
278 DSIDs = std::move(DSID_vec);
279 //} else if(std::find(m_taggers.begin(), m_taggers.end(), tagger) == m_taggers.end()){
280 } else if(m_taggers.find(tagger) == m_taggers.end()){
281 std::cout << " The tagger [" << tagger << "] doesn't exist in this CDI file!" << std::endl;
282 //} else if(std::find(m_jetcollections.begin(), m_jetcollections.end(), jetcollection) == m_jetcollections.end()){
283 } else if(m_jetcollections.find(jetcollection) == m_jetcollections.end()){
284 std::cout << " The jet collection [" << jetcollection << "] doesn't exist in " << tagger << " this CDI file!" << std::endl;
285 //} else if(std::find(m_workingpoints.begin(), m_workingpoints.end(), workingpoint) == m_workingpoints.end()){
286 } else if(m_workingpoints.find(workingpoint) == m_workingpoints.end()){
287 std::cout << " The working point [" << workingpoint << "] doesn't exist in " << tagger << "/" << jetcollection << " this CDI file!" << std::endl;
288 } else {
289 for (const std::string& DSID : m_metadata[tagger][jetcollection][workingpoint]["DSIDs"]){
290 DSIDs.push_back(DSID);
291 }
292 }
293 return DSIDs;
294}
295
296std::vector<std::string> Analysis::CDIReader::getLabels(const std::string& tagger, const std::string& jetcollection, const std::string& workingpoint){
297 if(!m_initialized){
298 std::cout << " CDIReader :: You need to validate your configuration before working with (flavour) labels!" << std::endl;
299 }
300 Labels labels;
301 if (tagger.empty() || jetcollection.empty() || workingpoint.empty()){
302 // unless specified, return the vector of all flavour labels
303 labels = m_label_vec;
304 } else if(m_taggers.find(tagger) == m_taggers.end()){
305 std::cout << " The tagger [" << tagger << "] doesn't exist in this CDI file!" << std::endl;
306 } else if(m_jetcollections.find(jetcollection) == m_jetcollections.end()){
307 std::cout << " The jet collection [" << jetcollection << "] doesn't exist in " << tagger << " this CDI file!" << std::endl;
308 } else if(m_workingpoints.find(workingpoint) == m_workingpoints.end()){
309 std::cout << " The working point [" << workingpoint << "] doesn't exist in " << tagger << "/" << jetcollection << " this CDI file!" << std::endl;
310 } else {
311 for (const std::string& label : m_metadata[tagger][jetcollection][workingpoint]["labels"]){
312 labels.push_back(label);
313 }
314 }
315 return labels;
316}
317
318std::vector<std::string> Analysis::CDIReader::getJetCollections(const std::string& tagger){
319 Labels jetcolls;
320 if (tagger.empty()){
321 // unless specified, return the vector of all jet collections
322 jetcolls.assign(m_jetcollections.begin(), m_jetcollections.end());
323 } else if (m_taggers.find(tagger) == m_taggers.end()) {
324 std::cout << " The tagger [" << tagger << "] doesn't exist in this CDI file!" << std::endl;
325 } else {
326 // return the jetcollections of this tagger
327 for(const auto& jet : m_metadata[tagger]){
328 jetcolls.push_back(jet.first);
329 }
330 }
331 return jetcolls;
332}
333
334
335std::vector<std::string> Analysis::CDIReader::getWorkingPoints(const std::string& tagger, const std::string& jetcollection){
336 Labels wps;
337 if (tagger.empty() || jetcollection.empty()){
338 // unless specified, return the vector of all working points
339 wps.assign(m_workingpoints.begin(), m_workingpoints.end());
340 } else if(m_taggers.find(tagger) == m_taggers.end()){
341 std::cout << " The tagger [" << tagger << "] doesn't exist in this CDI file!" << std::endl;
342 } else if(m_jetcollections.find(jetcollection) == m_jetcollections.end()){
343 std::cout << " The jet collection [" << jetcollection << "] doesn't exist in " << tagger << " this CDI file!" << std::endl;
344 } else {
345 for(const auto& wp : m_metadata[tagger][jetcollection]){
346 wps.push_back(wp.first);
347 }
348 }
349 return wps;
350}
351
352std::vector<std::string> Analysis::CDIReader::getTaggers(){
353 Labels taggers;
354 taggers.assign(m_taggers.begin(), m_taggers.end());
355 return taggers;
356}
nlohmann::json json
void record_metadata(const std::string &datum, int depth=0)
Definition CDIReader.h:113
Labels getDSIDs(const std::string &tagger="", const std::string &jetcollection="", const std::string &workingpoint="")
Labels getLabels(const std::string &tagger="", const std::string &jetcollection="", const std::string &workingpoint="")
std::unique_ptr< TFile > m_CDIFile
Definition CDIReader.h:104
std::set< std::string > m_taggers
Definition CDIReader.h:105
std::set< std::string > m_DSIDs
Definition CDIReader.h:109
void record_metadata_map(const Data &data, const std::string &path)
Definition CDIReader.h:132
Labels getWorkingPoints(const std::string &tagger, const std::string &jetcollection)
bool checkConfig(const std::string &tagger, const std::string &jetcoll, const std::string &wp, bool verbose=false)
bool isWPdirectory(TList *list)
Definition CDIReader.h:143
std::set< std::string > m_workingpoints
Definition CDIReader.h:107
Labels getJetCollections(const std::string &tagger)
std::set< std::string > m_jetcollections
Definition CDIReader.h:106
CDIReader(const std::string &cdipath, bool verbose=false)
normal constructor.
Definition CDIReader.cxx:15
std::string m_cdipath
Definition CDIReader.h:103
std::set< std::string > m_labels
Definition CDIReader.h:108
std::map< std::string, Labels > Data
Definition CDIReader.h:42
void printMetadata(int tagger=-1, int jetcoll=-1, int wpoint=-1, int label=-1)
std::vector< std::string > Labels
Definition CDIReader.h:41
bool m_initialized
flag whether the initialization has been carried out
Definition CDIReader.h:100
void crawlCDI(TDirectoryFile *parentDir, int depth=0, const std::string &metamap="")
Definition CDIReader.cxx:40
std::vector< std::string > listUncertainties() const
retrieve the list of "uncertainties" accessible to this object.
This is the class holding information for histogram-based calibration results.
std::string depth
tag string for intendation
Definition fastadd.cxx:46
int count(std::string s, const std::string &regx)
count how many occurances of a regx are in a string
Definition hcg.cxx:148
bool verbose
Definition hcg.cxx:75
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:179
std::string label(const std::string &format, int i)
Definition label.h:19
std::string cwd
Definition listroot.cxx:38
void sort(typename DataModel_detail::iterator< DVL > beg, typename DataModel_detail::iterator< DVL > end)
Specialization of sort for DataVector/List.
#define unlikely(x)