ATLAS Offline Software
HanOutputFile_translation.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2022 CERN for the benefit of the ATLAS collaboration
3  */
4 
5 #include <iostream> //input-output
6 #include <nlohmann/json.hpp> //If you run this code as part of Athena
7 // Root libraries
8 #include <TFile.h>
9 #include <TFolder.h>
10 #include <TH1.h>
11 #include <TKey.h>
12 #include <TObjString.h>
13 #include <TROOT.h> //To use gRoot
14 #include <unistd.h> //To use usleep function
15 
16 #include <chrono> //To measure time
17 #include <cstring> //to convert string to char array (to save json file as TObjString)
18 
35 // Function, that checks, if the directory includes histogram in it or in any subdirectories
36 
37 int include_hist(TObject* obj);
38 
39 // Function, that defines the number of objects inside a directory
41 
42 // Function, that converts sequense, containing TDirectories and strings to JSON file
43 nlohmann::ordered_json to_JSON(TObject* obj);
44 
45 // What to do if dir has no hist
46 int work_with_no_hist_dir(TObject* obj, TObject* destination_to_save);
47 
48 // Results directory usually does not include any hists. In such a way, after conversion it usually represents JSON
49 // string
50 // However,there are some cases, when it includes histogram "Reference". In this case it will be saved as Tdirectory.
51 // The conversion should be done in such a way, that the type of "Results" object should be the same all the time.
52 // The simpliest way is to store hist in the higher level and store the "Results" as JSON all the time.
53 // The higher level is a hist_ TDirectory. However this directory also not always will contains Reference. In this
54 // regards, the histo_ directory will always be Tdirectory кegardless of whether it contains a link or not.
55 int work_with_results_dir(TObject* obj_in, TObject* obj_to);
56 
57 // Converts from old format to new format
58 int convert_file(TObject* obj_in, TObject* obj_to);
59 
60 int main(int argc, char* argv[]) {
61  using namespace std;
62  // ATLAS Data Quality space
63  TString input_file_path = "/eos/atlas/atlascerngroupdisk/data-dqm/examples/han_output_translation_example/";
64  TString input_file_name = "run_364030_lowStat_LB121-140_han.root"; // Example file
65  TString output_file_path = "./";
66 
67  if (argc == 2) {
68  input_file_name = argv[1];
69  }
70  if (argc == 3) {
71  input_file_name = argv[1];
72  output_file_path = argv[2];
73  }
74  if (argc == 4) {
75  input_file_path = argv[1];
76  input_file_name = argv[2];
77  output_file_path = argv[3];
78  }
79 
80  auto start = std::chrono::system_clock::now(); // Start time counting
81  // Open file from which to convert
82  TFile* f_input = new TFile(input_file_path + input_file_name);
83  // Open file where to convert
84  // Enter here the path where you want to store the iutput file
85  TFile* f_output = new TFile(output_file_path + input_file_name, "recreate");
86  // Counting time elapsed for the translation
87  auto end = std::chrono::system_clock::now(); // Stop time counting
88  std::chrono::duration<double> elapsed_seconds = end - start;
89  std::cout << "elapsed time to open files: " << elapsed_seconds.count() << "s\n";
90  start = std::chrono::system_clock::now(); // Start time counting
91 
92  // Convert from V1-->V2.3
93  convert_file(f_input, f_output);
94 
95  // Add the version flag
96 
97  f_output->cd("HanMetadata_");
98  TDirectory* version_dir = gDirectory->mkdir("File");
99  version_dir->cd();
100  TDirectory* version_sub_dir = gDirectory->mkdir("Version_name");
101  version_sub_dir->cd();
102  TObjString file_version;
103  file_version.Write("V.2.3");
104 
105  end = std::chrono::system_clock::now(); // Stop time counting
106  elapsed_seconds = end - start;
107  std::cout << "elapsed time for algorythm implementation: " << elapsed_seconds.count() << "s\n";
108  // From root forum
109  // To reduce the time spent in the ‘garbage collection’ and assuming that you are 100% sure
110  // that no pointer to the contained object is shared and assuming you explicitly delete the
111  // TFile object, you can remove the TFile object from the list of files
112  // “gROOT->GetListOfFiles()->Remove(myfile);”
113 
114  // This is a known problem with file with very large number of histograms in a file
115  //(and in particular in a directory). You can work around the problem, if (and only if)
116  // you are sure the same histogram is not shared between two directories:
117  // TFile *outputFile = ..... ..... gROOT->GetListOfFiles()->Remove(outputFile); delete outputFile;
118 
119  // start = std::chrono::system_clock::now(); // Start time counting
120  gROOT->GetListOfFiles()->Remove(f_input); // removes the file from a list of “files to cleanup”, so that the objects
121  // they contain are not cleaned at the end.
122  gROOT->GetListOfFiles()->Remove(f_output);
123  f_output->Close();
124  f_input->Close(); // Close the file
125  delete f_input;
126  delete f_output;
127  end = std::chrono::system_clock::now(); // Stop time counting
128  elapsed_seconds = end - start;
129  std::cout << "elapsed time to close files: " << elapsed_seconds.count() << "s\n";
130  return 0;
131 }
132 
133 //---------Functions description--------------
134 int convert_file(TObject* obj_input, TObject* obj_outout) {
135  TString obj_input_type = obj_input->ClassName();
136  bool is_file;
137 
138  if (obj_input_type == "TFile") {
139  is_file = true;
140  } else {
141  is_file = false;
142  }
143 
144  TDirectory* dir;
145  TDirectory* save_to = dynamic_cast<TDirectory*>(obj_outout);
146  TString name = obj_input->GetName();
147 
148  if (dir = dynamic_cast<TDirectory*>(obj_input)) { // obj_input is a TDirectory
149  TString name = dir->GetName();
150  if (name == "Results") { // From Results folder should be extracted Reference histogrma (in an upper level), and
151  // Results --> JSON
152  work_with_results_dir(dir, save_to);
153  } else if (name == "Config") { // Config --> JSON
154  work_with_no_hist_dir(dir, save_to);
155  } else {
156  // Treat as TDirectory
157  // 1-st, lets create this dir
158  TDirectory* copy_dir;
159  if (is_file == false) {
160  copy_dir = save_to->mkdir(name);
161  } else {
162  copy_dir = save_to;
163  }
164  // All files in this dir should be saved in the created one
165  copy_dir->cd();
166  // 2-nd, create iterator to iterate through the objects in the original dir
167  TIter next(dir->GetListOfKeys());
168  TKey* key;
169  // 3-rd, Analyse, from which elements this dir consists of
170  while ((key = (TKey*) next())) {
171  TObject* next_level_obj;
172  TString key_name = key->GetName();
173  next_level_obj = dir->GetKey(key_name)->ReadObj();
174  convert_file(next_level_obj, copy_dir);
175  }
176  }
177  } else { // If the object is not a Tdirectory (a histogram or TObjsStrin)
178  // just save it as it is where it should be
179  save_to->cd();
180  obj_input->Write(name);
181  }
182  return 1;
183 }
184 
185 int work_with_results_dir(TObject* obj_input, TObject* obj_outout) {
186  TDirectory* dir = (TDirectory*) obj_input;
187  TDirectory* save_to = (TDirectory*) obj_outout;
188 
189  if (include_hist(dir)) {
190  TKey* key;
191  TString key_type;
192  TString key_name;
193  TIter next(dir->GetListOfKeys());
194  while ((key = (TKey*) next())) {
195  TObject* obj_inside;
196  key_name = key->GetName();
197  obj_inside = dir->GetKey(key_name)->ReadObj();
198  key_type = obj_inside->ClassName();
199  // If an element in "Results" directory is a hist, we will save it in higher level
200  if (key_type == "TH1I" || key_type == "TH2I" || key_type == "TH1F" || key_type == "TH2F" ||
201  key_type == "TProfile2D" || key_type == "TProfile" || key_type == "TGraphAsymmErrors" ||
202  key_type == "TGraphErrors" || key_type == "TH1D" || key_type == "TH2S") {
203  save_to->cd();
204  obj_inside->Write(key_name);
205  }
206  }
207  // Then we will work with this directory as with ones without hists
208  work_with_no_hist_dir(obj_input, obj_outout);
209  }
210  // If "Results" has no histograms inside, we will work with it as with the usual directory
211  else {
212  work_with_no_hist_dir(obj_input, obj_outout);
213  }
214  return 1;
215 }
216 
217 int work_with_no_hist_dir(TObject* obj, TObject* destination_to_save) {
218  TDirectory* save_place = (TDirectory*) destination_to_save;
219 
220  // If directory has no hists in it, we convert it to JSON
221  // histogram_ Tdirectory is an exception. It will always be a Tdirectory
222  save_place->cd();
223  nlohmann::ordered_json j = to_JSON(obj);
224  // Then, save JSON to file as TObjString
225  // Convert json to string
226  std::string string = j.dump(4);
227  // Write JSON to rootFile
228 
229  TObjString string_to_tfile;
230  // string_to_tfile.SetString(cstr);//Original
231  if (j.is_null()) {
232  string_to_tfile.SetString("{}"); // Content of a JSON string
233  } else {
234  string_to_tfile.SetString(string.data()); // Content of a JSON string
235  }
236  TString key_name = obj->GetName();
237  string_to_tfile.Write(key_name);
238  return 0;
239 }
240 
241 int include_hist(TObject* obj) {
242  using namespace std;
243 
244  TDirectory* dir = (TDirectory*) obj;
245  TKey* key;
246  TString key_type;
247  TString key_name;
248  // Look, what the directory stores
249  dir->cd();
250  TIter next(dir->GetListOfKeys());
251  while ((key = (TKey*) next())) {
252  TObject* obj_inside;
253  key_name = key->GetName();
254  obj_inside =
255  dir->GetKey(key_name)->ReadObj(); // Get the object. This procedure is better, since it is able to read
256  // names with "/"
257  key_type = obj_inside->ClassName();
258  // If the object is histogram
259  if (key_type == "TH1I" || key_type == "TH2I" || key_type == "TH1F" || key_type == "TH2F" ||
260  key_type == "TProfile2D" || key_type == "TProfile" || key_type == "TGraphAsymmErrors" ||
261  key_type == "TGraphErrors" || key_type == "TH1D" || key_type == "TH2S") {
262  return 1;
263  }
264  // Also check all the subdirectories
265  if (key_type == "TDirectoryFile") {
266  if (include_hist(obj_inside) == 1) {
267  return 1;
268  }
269  }
270  }
271  return 0;
272 }
273 
275  const TCollection* next_coll = next.GetCollection();
276  Int_t level_size = next_coll->Capacity();
277 
278  return level_size;
279 }
280 
281 nlohmann::ordered_json to_JSON(TObject* obj) {
282  using json = nlohmann::ordered_json;
283  TString obj_type = obj->ClassName();
284  json j;
285 
286  if (obj_type == "TObjString") { // If the object type, that were passed to this function is TObjString (should be
287  // impossible), this means, that there is a TObjString, that is not a single file in
288  // a
289  // directory (not a usual case)
290  std::cout << "WARNING: Strange case: TObjString is not a single object in a dir" << std::endl;
291  } else if (obj_type != "TDirectoryFile" && obj_type != "TFile") { // No other type than TDirectory or TFile should be
292  // passed to this function normally
293  std::cout << "WARNING: Strange type: " << obj_type << std::endl;
294  }
295 
296  TDirectory* dir = (TDirectory*) obj;
297  TString dir_name = dir->GetName();
298  TIter next(dir->GetListOfKeys());
299  TKey* key;
300  TString key_name;
301 
302  int size_next = number_of_objects_in_dir(next);
303 
304  // We should write to JSON all the objects
305  while ((key = (TKey*) next())) {
306  TObject* next_level_obj;
307  key_name = key->GetName();
308  next_level_obj =
309  dir->GetKey(key_name)->ReadObj(); // Get object. This procedure is better, since it is able to read
310  // names with "/"
311  TString key_type = next_level_obj->ClassName();
312 
313  if (size_next == 1 && key_type == "TObjString") { // If this is a directory just before the leaf (the TObjString
314  // file)
315  j = key_name; // This is the leaf
316  if (key_name == dir_name) {
317  std::cout << "WARNING: The names of Directory and TObjstring inside this directory are the same: " << dir_name
318  << std::endl;
319  }
320  }
321  // We will ignore Hists in "Results" directory, since we have already written them in a higher level
322  else if ((dir_name == "Results") &&
323  (key_type == "TH1I" || key_type == "TH2I" || key_type == "TH1F" || key_type == "TH2F" ||
324  key_type == "TProfile2D" || key_type == "TProfile" || key_type == "TGraphAsymmErrors" ||
325  key_type == "TGraphErrors" || key_type == "TH1D" || key_type == "TH2S")) {
326  continue;
327  }
328  // If inside this directory other subdirrectory
329  else { // Write Directory_names as keys and content of the dirrectories as a values
330  // Convert TString to string
331  std::string key_name_string(key_name.Data());
332  // Write JSON to rootFile
333  j.emplace(key_name_string, to_JSON(next_level_obj));
334  }
335  }
336  return j;
337 }
data
char data[hepevt_bytes_allocation_ATLAS]
Definition: HepEvt.cxx:11
to_JSON
nlohmann::ordered_json to_JSON(TObject *obj)
Definition: HanOutputFile_translation.cxx:281
convert_file
int convert_file(TObject *obj_in, TObject *obj_to)
Definition: HanOutputFile_translation.cxx:134
json
nlohmann::json json
Definition: HistogramDef.cxx:9
mergePhysValFiles.start
start
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:14
LArCellConditions.argv
argv
Definition: LArCellConditions.py:112
work_with_results_dir
int work_with_results_dir(TObject *obj_in, TObject *obj_to)
Definition: HanOutputFile_translation.cxx:185
mergePhysValFiles.end
end
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:93
python.handimod.now
now
Definition: handimod.py:675
fillPileUpNoiseLumi.next
next
Definition: fillPileUpNoiseLumi.py:52
ParseInputs.gDirectory
gDirectory
Definition: Final2012/ParseInputs.py:133
work_with_no_hist_dir
int work_with_no_hist_dir(TObject *obj, TObject *destination_to_save)
Definition: HanOutputFile_translation.cxx:217
main
int main(int argc, char *argv[])
Definition: HanOutputFile_translation.cxx:60
DQHistogramMergeRegExp.argc
argc
Definition: DQHistogramMergeRegExp.py:20
beamspotman.dir
string dir
Definition: beamspotman.py:623
include_hist
int include_hist(TObject *obj)
Instructions: COMPILE g++ -std=c++11 HanOutputFile_translation.cxx -O2 root-config --cflags root-conf...
Definition: HanOutputFile_translation.cxx:241
rename_histos_in_files_from_old_code.input_file_name
string input_file_name
Definition: rename_histos_in_files_from_old_code.py:17
name
std::string name
Definition: Control/AthContainers/Root/debug.cxx:195
number_of_objects_in_dir
int number_of_objects_in_dir(TIter next)
Definition: HanOutputFile_translation.cxx:274
python.PyAthena.obj
obj
Definition: PyAthena.py:135
create_input.copy_dir
def copy_dir(source, destination)
Definition: create_input.py:73
mapkey::key
key
Definition: TElectronEfficiencyCorrectionTool.cxx:37