ATLAS Offline Software
Loading...
Searching...
No Matches
CDIReader.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3*/
4
6// CDIReader.h, (c) ATLAS Detector software
8
10
11#include <nlohmann/json.hpp>
12
13using json = nlohmann::json;
14
15Analysis::CDIReader::CDIReader(const std::string& cdipath, bool verbose) : m_use_json(false), m_cdipath(cdipath), m_CDIFile(TFile::Open(m_cdipath.c_str(), "READ"))
16{
17 TObjString* s;
18 m_CDIFile->GetObject("VersionInfo/BuildNumber", s);
19 if (s && verbose){
20 std::cout << " CDI file build number: " << s->GetName() << std::endl;
21 }
22 TList* taggerkeys = m_CDIFile->GetListOfKeys();
23 for (const auto tagger : *taggerkeys){
24 const char* taggername = tagger->GetName();
25 if(strcmp(taggername, "VersionInfo") != 0){
26 // now we have the top-level tagger name, we want to add this to our overall meta data
27 TDirectoryFile* taggerDir = (TDirectoryFile*)m_CDIFile->Get(taggername);
28 record_metadata(taggername, 0);
29 crawlCDI(taggerDir, 0, taggername);
30 }
31 }
32}
33
34
35void Analysis::CDIReader::crawlCDI(TDirectoryFile* parentDir, int depth, const std::string& metamap){
36
37 TList* keys = parentDir->GetListOfKeys(); // get directories
38 if(isWPdirectory(keys)){
39 // now that we are at the level of the individual label directories
40 // interact with them, and extract the data they store
41
42 TList* labelkeys = parentDir->GetListOfKeys();
43 Data theseData; // labels, systematics, DSIDS, etc.
44 Labels theseLabels;
45 std::set<std::string> DSID_set; // record all DSID names in the flavour configuration
46 std::set<std::string> systematics_set; // record all uncertainties met in flavour configuration
47
48 Labels path = split(metamap);
49 std::string taggername = path.at(0);
50 std::string jetcollname = path.at(1);
51 std::string workingpointname = path.at(2);
52
53 // For each "label" stored in the working point directory,
54 // we need to access the contents of the label directory
55 // and construct the metadata map
56 for(const auto label : *labelkeys){
57 std::string labelname = label->GetName();
58 if(labelname == "cutvalue" || labelname.find("fraction") != std::string::npos) continue;
59 m_labels.insert(labelname);
60 theseLabels.push_back(labelname);
61
62 // now enter the directory to access uncertainty info for this flavour
63 TDirectoryFile* flavourDir = (TDirectoryFile*)parentDir->Get(labelname.c_str());
64 if(flavourDir){
65 Labels uncertainties; // flavour specific uncertainties
66 TList* DSIDkeys = flavourDir->GetListOfKeys(); // this is the list of all the items in the flavour (DSID etc)
67 for(const auto CDHistCont : *DSIDkeys){
68 std::string DSIDname = CDHistCont->GetName();
69 DSID_set.insert(DSIDname);
70 m_DSIDs.insert(DSIDname);
71 if(DSIDname == "default_SF"){ // let's access the systematic uncertainties
72 // construct the total path
73 std::string dir = taggername + "/" + jetcollname + "/" + workingpointname + "/" + labelname + "/default_SF";
75 m_CDIFile->GetObject(dir.c_str(), cont);
76 if(!cont){
77 std::cout << "No default_SF CalibrationDataHistogramContainer?" << std::endl;
78 } else {
79 uncertainties = cont->listUncertainties();
80 for(const std::string &s : uncertainties){
81 systematics_set.insert(s);
82 }
83 }
84 // add the flavour specific uncertainties here
85 std::string flav_spec_unc_name = labelname + "_syst";
86 theseData[flav_spec_unc_name] = uncertainties;
87 }
88 }
89 } else {
90 std::cout << "No flavour directory?" << std::endl;
91 }
92 }
93 // sort and add the labels to the Data object
94 std::sort(theseLabels.begin(), theseLabels.end());
95 theseData["labels"] = std::move(theseLabels);
96 // convert DSID set to vector of strings
97 Labels theseDSIDs(DSID_set.size());
98 std::copy(DSID_set.begin(), DSID_set.end(), theseDSIDs.begin());
99 theseData["DSIDs"] = std::move(theseDSIDs);
100 // convert systematic set to vector of strings
101 Labels theseSystematics(systematics_set.size());
102 std::copy(systematics_set.begin(), systematics_set.end(), theseSystematics.begin());
103 theseData["systematics"] = std::move(theseSystematics);
104 // Construct this branch of the metadata map
105 // and record the Data object
106 record_metadata_map(theseData, metamap);
107 } else {
108 for(const auto coll: *keys){
109 std::string collname = coll->GetName();
110 if ( collname.find("cutvalue") != std::string::npos || collname.find("fraction") != std::string::npos) continue;
111 // track the metadata as you traverse
112 record_metadata(collname, depth+1);
113 TDirectoryFile* collDir = (TDirectoryFile*)parentDir->Get(collname.c_str());
114 if(collDir && collname != "VersionInfo"){
115 std::string nextmap = metamap + ";" + collname;
116 crawlCDI(collDir, depth+1, nextmap); // traverse further
117 } else {
118 std::cout << "No collection directory?" << std::endl;
119 }
120 }
121 }
122}
123
124
125
126
127void Analysis::CDIReader::printMetadata(int tagger, int jetcoll, int wpoint, int label){
128 /*
129 This method prints subsets of the available metadata collected from the CDI file.
130
131 The CDI data is organized in a hierarchical directory structure, where taggers contain jet-collections, and jet-collections
132 contain working points, etc.
133
134 This method prints wildcard data such as:
135 - tagger / *
136 - tagger / jetcoll/ *
137 - tagger / jetcoll/ wpoint / *
138 - tagger / jetcoll / wpoint / label
139
140 The method operates on simple integer inputs, to indicate what to print out:
141 - Positive integer values for tagger, jetcoll, wpoint, and label (up to the number of available items of each)
142 will print out information specific to that particular entry (e.g. tagger == 1 will print tagger #1 specific info)
143 - Negative integer values indicate a wildcard, and will print out all available items of this type.
144 - Zero indicates to print nothing for that category.
145 */
146 int current_tagger = (tagger < 0) ? -1 : 0;
147 int current_jetcoll = (jetcoll < 0) ? -1 : 0;
148 int current_wpoint = (wpoint < 0) ? -1 : 0;
149 int current_label = (label < 0) ? -1 : 0;
150
151 for (const auto& [tag, jets] : m_metadata){
152 if(current_tagger != -1) current_tagger += 1;
153 if(tagger != current_tagger || tagger == 0) continue;
154
155 std::cout << "| " << tag << std::endl; // print only the tagger you're interested in
156
157 for (const auto& [jet, wps] : jets){
158 if(current_jetcoll != -1) current_jetcoll += 1;
159 if(jetcoll != current_jetcoll || jetcoll == 0) continue;
160
161 std::cout << "|\\__ " << jet << std::endl;
162
163 int num_wps = wps.size();
164 int num_wp_seen = 0;
165 for(const auto& [wp, labels] : wps){
166 if(current_wpoint != -1) current_wpoint += 1;
167 if(wpoint != current_wpoint || wpoint == 0) continue;
168 num_wp_seen += 1;
169 if(num_wp_seen != num_wps){
170 std::cout << "| |\\__" << wp << std::endl;
171 } else {
172 std::cout << "| \\__" << wp << std::endl;
173 }
174
175 int label_index = 0;
176 Data d = labels;
177 for(const std::string& l : d["labels"]){
178 if(current_label != -1) current_label += 1;
179 if(label != current_label || label == 0) continue;
180 if(num_wp_seen != num_wps && label_index == 0){
181 std::cout << "| | \\___" << " (" << label_index << ") " << l << std::endl;
182 } else if(label_index != 0 && num_wp_seen != num_wps) {
183 std::cout << "| | \\___" << " (" << label_index << ") " << l << std::endl;
184 } else {
185 std::cout << "| \\___" << " (" << label_index << ") " << l << std::endl;
186 }
187 label_index += 1;
188 }
189 }
190 }
191 }
192}
193
194bool Analysis::CDIReader::checkConfig(const std::string& tagger, const std::string& jetcoll, const std::string& wp, bool verbose){
195 // this method checks if your config is correct or not
196 // returns true if correct, false if not
197 // if not correct, it will also print a helpful message
198 bool configured = false;
199 // get the number that would correspond to the index of the tagger/jetcoll/wp
200 // if these were each stored in a vector of strings, sorted alphanumerically
201 // which happens already by default in the (ordered) map
202 int tagger_ind = 0;
203 int jetcoll_ind = 0;
204 int wp_ind = 0;
205
206 if(m_metadata.count(tagger) > 0){
207 // get the tagger index
208 for(const auto& tag : m_metadata){
209 tagger_ind += 1;
210 if(tag.first == tagger) break;
211 }
212 if(m_metadata[tagger].count(jetcoll) > 0){
213 // get the jet collection index
214 for(const auto& jet : m_metadata[tagger]){
215 jetcoll_ind += 1;
216 if(jet.first == jetcoll) break;
217 }
218 if(m_metadata[tagger][jetcoll].count(wp) > 0){
219 // get the working point index
220 for(const auto& wpoint : m_metadata[tagger][jetcoll]){
221 wp_ind += 1;
222 if(wpoint.first == wp) break;
223 }
224 if (verbose) std::cout << " Your configuration looks good! Available labels are : " << std::endl;
225 if (verbose) printMetadata(tagger_ind, jetcoll_ind, wp_ind, -1);
226 // construct vector of labels
227 for(const std::string& flavour_label : m_metadata[tagger][jetcoll][wp]["labels"]){
228 m_label_vec.push_back(flavour_label);
229 }
230 // sort the vector of labels
231 std::sort(m_label_vec.begin(), m_label_vec.end());
232 configured = true;
233 } else {
234 if (verbose) std::cout << "Couldn't find \"" << wp << "\" for " << tagger << " / " << jetcoll << " in this CDI file!" << std::endl;
235 if (verbose) std::cout << "Here are your options :" << std::endl;
236 if (verbose) printMetadata(tagger_ind, jetcoll_ind, -1, 0);
237 }
238 } else {
239 if (verbose) std::cout << "Couldn't find \"" << jetcoll << "\" under " << tagger << " in this CDI file!" << std::endl;
240 if (verbose) std::cout << "Here are your options :" << std::endl;
241 if (verbose) printMetadata(tagger_ind, -1, 0, 0);
242 }
243 } else {
244 if (verbose) std::cout << "Couldn't find \"" << tagger << "\" in this CDI file" << std::endl;
245 if (verbose) std::cout << "Here are your options :" << std::endl;
246 if (verbose) printMetadata(-1,0,0,0);
247 }
248
249 if(m_use_json){
250 // let's make a json object from the nlohmann package and save it to file
251 json json_metadata(m_metadata);
252 // get cwd to save in
253 std::filesystem::path cwd = std::filesystem::current_path();
254 std::filesystem::path filepath = cwd / "CDI.json";
255 std::ofstream output(filepath);
256 output << std::setw(4) << json_metadata << std::endl;
257 }
258
259 m_initialized = configured;
260 return configured;
261}
262
263
264std::vector<std::string> Analysis::CDIReader::getDSIDs(const std::string& tagger, const std::string& jetcollection, const std::string& workingpoint){
265 if(!m_initialized){
266 std::cout << " CDIReader :: You need to validate your configuration before working with (flavour) labels!" << std::endl;
267 }
268 Labels DSIDs;
269 if (tagger.empty() || jetcollection.empty() || workingpoint.empty()){
270 // unless specified, return the vector of all known DSIDs from this CDI file
271 Labels DSID_vec(m_DSIDs.size());
272 std::copy(m_DSIDs.begin(), m_DSIDs.end(), DSID_vec.begin());
273 DSIDs = std::move(DSID_vec);
274 //} else if(std::find(m_taggers.begin(), m_taggers.end(), tagger) == m_taggers.end()){
275 } else if(m_taggers.find(tagger) == m_taggers.end()){
276 std::cout << " The tagger [" << tagger << "] doesn't exist in this CDI file!" << std::endl;
277 //} else if(std::find(m_jetcollections.begin(), m_jetcollections.end(), jetcollection) == m_jetcollections.end()){
278 } else if(m_jetcollections.find(jetcollection) == m_jetcollections.end()){
279 std::cout << " The jet collection [" << jetcollection << "] doesn't exist in " << tagger << " this CDI file!" << std::endl;
280 //} else if(std::find(m_workingpoints.begin(), m_workingpoints.end(), workingpoint) == m_workingpoints.end()){
281 } else if(m_workingpoints.find(workingpoint) == m_workingpoints.end()){
282 std::cout << " The working point [" << workingpoint << "] doesn't exist in " << tagger << "/" << jetcollection << " this CDI file!" << std::endl;
283 } else {
284 for (const std::string& DSID : m_metadata[tagger][jetcollection][workingpoint]["DSIDs"]){
285 DSIDs.push_back(DSID);
286 }
287 }
288 return DSIDs;
289}
290
291std::vector<std::string> Analysis::CDIReader::getLabels(const std::string& tagger, const std::string& jetcollection, const std::string& workingpoint){
292 if(!m_initialized){
293 std::cout << " CDIReader :: You need to validate your configuration before working with (flavour) labels!" << std::endl;
294 }
295 Labels labels;
296 if (tagger.empty() || jetcollection.empty() || workingpoint.empty()){
297 // unless specified, return the vector of all flavour labels
298 labels = m_label_vec;
299 } else if(m_taggers.find(tagger) == m_taggers.end()){
300 std::cout << " The tagger [" << tagger << "] doesn't exist in this CDI file!" << std::endl;
301 } else if(m_jetcollections.find(jetcollection) == m_jetcollections.end()){
302 std::cout << " The jet collection [" << jetcollection << "] doesn't exist in " << tagger << " this CDI file!" << std::endl;
303 } else if(m_workingpoints.find(workingpoint) == m_workingpoints.end()){
304 std::cout << " The working point [" << workingpoint << "] doesn't exist in " << tagger << "/" << jetcollection << " this CDI file!" << std::endl;
305 } else {
306 for (const std::string& label : m_metadata[tagger][jetcollection][workingpoint]["labels"]){
307 labels.push_back(label);
308 }
309 }
310 return labels;
311}
312
313std::vector<std::string> Analysis::CDIReader::getJetCollections(const std::string& tagger){
314 Labels jetcolls;
315 if (tagger.empty()){
316 // unless specified, return the vector of all jet collections
317 jetcolls.assign(m_jetcollections.begin(), m_jetcollections.end());
318 } else if (m_taggers.find(tagger) == m_taggers.end()) {
319 std::cout << " The tagger [" << tagger << "] doesn't exist in this CDI file!" << std::endl;
320 } else {
321 // return the jetcollections of this tagger
322 for(const auto& jet : m_metadata[tagger]){
323 jetcolls.push_back(jet.first);
324 }
325 }
326 return jetcolls;
327}
328
329
330std::vector<std::string> Analysis::CDIReader::getWorkingPoints(const std::string& tagger, const std::string& jetcollection){
331 Labels wps;
332 if (tagger.empty() || jetcollection.empty()){
333 // unless specified, return the vector of all working points
334 wps.assign(m_workingpoints.begin(), m_workingpoints.end());
335 } else if(m_taggers.find(tagger) == m_taggers.end()){
336 std::cout << " The tagger [" << tagger << "] doesn't exist in this CDI file!" << std::endl;
337 } else if(m_jetcollections.find(jetcollection) == m_jetcollections.end()){
338 std::cout << " The jet collection [" << jetcollection << "] doesn't exist in " << tagger << " this CDI file!" << std::endl;
339 } else {
340 for(const auto& wp : m_metadata[tagger][jetcollection]){
341 wps.push_back(wp.first);
342 }
343 }
344 return wps;
345}
346
347std::vector<std::string> Analysis::CDIReader::getTaggers(){
348 Labels taggers;
349 taggers.assign(m_taggers.begin(), m_taggers.end());
350 return taggers;
351}
nlohmann::json json
void record_metadata(const std::string &datum, int depth=0)
Definition CDIReader.h:113
Labels getDSIDs(const std::string &tagger="", const std::string &jetcollection="", const std::string &workingpoint="")
Labels getLabels(const std::string &tagger="", const std::string &jetcollection="", const std::string &workingpoint="")
std::unique_ptr< TFile > m_CDIFile
Definition CDIReader.h:104
std::set< std::string > m_taggers
Definition CDIReader.h:105
std::set< std::string > m_DSIDs
Definition CDIReader.h:109
void record_metadata_map(const Data &data, const std::string &path)
Definition CDIReader.h:132
Labels getWorkingPoints(const std::string &tagger, const std::string &jetcollection)
bool checkConfig(const std::string &tagger, const std::string &jetcoll, const std::string &wp, bool verbose=false)
bool isWPdirectory(TList *list)
Definition CDIReader.h:143
std::set< std::string > m_workingpoints
Definition CDIReader.h:107
Labels getJetCollections(const std::string &tagger)
std::set< std::string > m_jetcollections
Definition CDIReader.h:106
CDIReader(const std::string &cdipath, bool verbose=false)
normal constructor.
Definition CDIReader.cxx:15
std::string m_cdipath
Definition CDIReader.h:103
std::set< std::string > m_labels
Definition CDIReader.h:108
std::map< std::string, Labels > Data
Definition CDIReader.h:42
void printMetadata(int tagger=-1, int jetcoll=-1, int wpoint=-1, int label=-1)
std::vector< std::string > Labels
Definition CDIReader.h:41
bool m_initialized
flag whether the initialization has been carried out
Definition CDIReader.h:100
void crawlCDI(TDirectoryFile *parentDir, int depth=0, const std::string &metamap="")
Definition CDIReader.cxx:35
std::vector< std::string > listUncertainties() const
retrieve the list of "uncertainties" accessible to this object.
This is the class holding information for histogram-based calibration results.
std::string depth
tag string for intendation
Definition fastadd.cxx:46
int count(std::string s, const std::string &regx)
count how many occurances of a regx are in a string
Definition hcg.cxx:146
bool verbose
Definition hcg.cxx:73
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
std::string label(const std::string &format, int i)
Definition label.h:19
std::string cwd
Definition listroot.cxx:38
void sort(typename DataModel_detail::iterator< DVL > beg, typename DataModel_detail::iterator< DVL > end)
Specialization of sort for DataVector/List.