ATLAS Offline Software
EmptyFileHandler.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration
3 */
4 
6 
7 
8 //
9 // includes
10 //
11 
13 
14 #include <RootCoreUtils/Assert.h>
16 #include <RootCoreUtils/ThrowMsg.h>
17 #include <RootCoreUtils/hadd.h>
18 #include <EventLoop/DirectDriver.h>
19 #include <EventLoop/Job.h>
20 #include <EventLoop/OutputStream.h>
25 #include <TList.h>
26 #include <TChain.h>
27 #include <TFile.h>
28 #include <TObjString.h>
29 #include <memory>
30 
31 #include <iostream>
32 
33 //
34 // method implementations
35 //
36 
37 namespace EL
38 {
39  namespace
40  {
41  std::vector<std::string>
42  readVectorFromTree (SH::Sample *sample, const std::string& treeName,
43  const std::string& branchName)
44  {
45  std::vector<std::string> result;
46  auto fileNames = sample->makeFileList();
47  TString *var = nullptr;
48  for (auto& fileName : fileNames)
49  {
50  std::unique_ptr<TFile> file (TFile::Open (fileName.c_str(), "READ"));
51  if (file == nullptr)
52  RCU_THROW_MSG ("failed to open file: " + fileName);
53  TTree *tree = dynamic_cast<TTree*>(file->Get (treeName.c_str()));
54  Long64_t nentries = 0;
55  if (tree != nullptr && (nentries = tree->GetEntries()) > 0)
56  {
57  TBranch *branch = nullptr;
58  tree->SetBranchAddress (branchName.c_str(), &var, &branch);
59  for (Long64_t entry = 0; entry < nentries; ++ entry)
60  {
61  if (branch->GetEntry(entry) <= 0)
62  RCU_THROW_MSG ("failed to read entry from branch");
63  result.push_back (var->Data());
64  }
65  }
66  }
67  return result;
68  }
69 
70 
71 
72  SH::SampleHandler makeReprocessSamples (const std::string& submitdir)
73  {
75  SH::SampleHandler shInput;
76  shInput.load (submitdir + "/input");
77 
79  SH::SampleHandler shHist;
80  shHist.load (submitdir + "/hist");
81  shHist.setMetaString (SH::MetaFields::treeName, "EventLoop_FileExecuted");
82 
84  SH::SampleHandler shReprocess;
85 
86  for (SH::Sample *sampleInput : shInput)
87  {
88  SH::Sample *sampleHist = shHist.get (sampleInput->name());
89  RCU_ASSERT (sampleHist != nullptr);
90  std::vector<std::string> filesUsedVector
91  = readVectorFromTree (sampleHist, "EventLoop_FileExecuted", "file");
92  std::set<std::string> filesUsed
93  (filesUsedVector.begin(), filesUsedVector.end());
94  RCU_ASSERT (filesUsed.size() == filesUsedVector.size());
95 
96  std::unique_ptr<SH::SampleLocal> sampleReprocess;
97  std::vector<std::string> filesInput (sampleInput->makeFileList ());
98 
99  for (auto fileInput : filesInput)
100  {
101  std::string fileName = fileInput;
102  auto split = fileName.rfind ('/');
103  if (split != std::string::npos)
104  fileName = fileName.substr (split + 1);
105 
106  if (filesUsed.find (fileName) == filesUsed.end())
107  {
108  if (sampleReprocess == nullptr)
109  {
110  sampleReprocess.reset (new SH::SampleLocal (sampleInput->name()));
111  *sampleReprocess->meta() = *sampleInput->meta();
112  }
113  sampleReprocess->add (fileInput);
114  }
115  }
116  if (filesInput.empty())
117  {
118  sampleReprocess.reset (new SH::SampleLocal (sampleInput->name()));
119  *sampleReprocess->meta() = *sampleInput->meta();
120  }
121  if (sampleReprocess != nullptr)
122  {
123  RCU_ASSERT (sampleReprocess->makeFileList().size() + filesUsed.size() == filesInput.size());
124  shReprocess.add (sampleReprocess.release());
125  }
126  }
127  return shReprocess;
128  }
129 
130 
131 
132  void mergeHists (const std::string& mainDir,
133  const std::string& extraDir,
134  const SH::SampleHandler& samples)
135  {
136  for (auto& sample : samples)
137  {
138  std::string tmp = extraDir + "/hist2-" + sample->name() + ".root";
139  std::string target = mainDir + "/hist-" + sample->name() + ".root";
140 
141  RCU::hadd (tmp, {target, extraDir + "/hist-" + sample->name() + ".root"});
143  }
144  }
145 
146 
147 
148  void mergeNtuple (const std::string& mainDir,
149  const std::string& extraDir)
150  {
151  SH::SampleHandler mainSH;
152  mainSH.load (mainDir);
153  SH::SampleHandler extraSH;
154  extraSH.load (extraDir);
155  SH::SampleHandler newSH;
156 
157  for (SH::Sample *extraSample : extraSH)
158  {
159  SH::Sample *mainSample = mainSH.get (extraSample->name());
160  RCU_ASSERT (mainSample != nullptr);
161  std::unique_ptr<SH::SampleLocal> newSample
162  (new SH::SampleLocal (extraSample->name()));
163  *newSample->meta() = *mainSample->meta();
164 
165  for (auto& file : mainSample->makeFileList())
166  newSample->add (file);
167  for (auto& file : extraSample->makeFileList())
168  newSample->add (file);
169  newSH.add (newSample.release());
170  }
171 
172  for (SH::Sample *mainSample : mainSH)
173  {
174  if (newSH.get (mainSample->name()) == nullptr)
175  newSH.add (mainSample);
176  }
177 
178  mainSH.save (mainDir);
179  }
180  }
181 
182  void processEmptyFiles (const std::string& submitdir,
183  const Job& job)
184  {
185  SH::SampleHandler shReprocess = makeReprocessSamples (submitdir);
186 
187  if (shReprocess.size() > 0)
188  {
189  std::string mysubmitdir = submitdir + "/emptyFiles";
190  Job myjob = job;
191  myjob.sampleHandler (shReprocess);
192 
194  driver.submit (myjob, mysubmitdir);
195 
196  mergeHists (submitdir, mysubmitdir, shReprocess);
197  for (auto output = job.outputBegin(),
198  end = job.outputEnd(); output != end; ++ output)
199  {
200  mergeNtuple (submitdir + "/output-" + output->label(),
201  mysubmitdir + "/output-" + output->label());
202  }
203  }
204  }
205 }
beamspotnt.var
var
Definition: bin/beamspotnt.py:1394
get_generator_info.result
result
Definition: get_generator_info.py:21
checkxAOD.fileNames
fileNames
Definition: Tools/PyUtils/bin/checkxAOD.py:79
SH::SampleHandler::add
void add(Sample *sample)
add a sample to the handler
EmptyFileHandler.h
Job.h
tree
TChain * tree
Definition: tile_monitor.h:30
OutputStream.h
SampleHandler.h
hadd.h
ShellExec.h
SH::SampleHandler::size
std::size_t size() const
the number of samples contained
Assert.h
FullCPAlgorithmsTest_eljob.driver
driver
Definition: FullCPAlgorithmsTest_eljob.py:171
mergePhysValFiles.end
end
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:93
RCU::hadd
void hadd(const std::string &output_file, const std::vector< std::string > &input_files, unsigned max_files)
effects: perform the hadd functionality guarantee: basic failures: out of memory III failures: i/o er...
Definition: hadd.cxx:28
PlotCalibFromCool.nentries
nentries
Definition: PlotCalibFromCool.py:798
SampleLocal.h
FortranAlgorithmOptions.fileName
fileName
Definition: FortranAlgorithmOptions.py:13
MetaObject.h
EL::processEmptyFiles
void processEmptyFiles(const std::string &submitdir, const Job &job)
check the output of the given (completed) job, rerun all (empty) files that did not get a fileExecute...
Definition: EmptyFileHandler.cxx:182
FullCPAlgorithmsTest_eljob.sample
sample
Definition: FullCPAlgorithmsTest_eljob.py:113
SH::Sample::makeFileList
std::vector< std::string > makeFileList() const
make a list of all files, prestaging them if necessary
file
TFile * file
Definition: tile_monitor.h:29
dumpFileToPlots.treeName
string treeName
Definition: dumpFileToPlots.py:20
EL
This module defines the arguments passed from the BATCH driver to the BATCH worker.
Definition: AlgorithmWorkerData.h:24
DeMoUpdate.tmp
string tmp
Definition: DeMoUpdate.py:1167
SH::Sample::meta
MetaObject * meta()
the meta-information for this sample
SH::MetaFields::treeName
static const std::string treeName
the name of the tree in the sample
Definition: MetaFields.h:52
SH::Sample::name
const std::string & name() const
the name of the sample we are using
SH::Sample
a base class that manages a set of files belonging to a particular data set and the associated meta-d...
Definition: Sample.h:54
merge.output
output
Definition: merge.py:17
GetAllXsec.entry
list entry
Definition: GetAllXsec.py:132
SH::SampleHandler::load
void load(const std::string &directory)
load all the samples from the given directory
EL::DirectDriver
a Driver that runs directly inside the submission job itself
Definition: DirectDriver.h:25
MetaFields.h
ThrowMsg.h
SH::SampleHandler::setMetaString
void setMetaString(const std::string &name, const std::string &value)
set the meta-data string with the given name for all samples.
RTTAlgmain.branch
branch
Definition: RTTAlgmain.py:61
copySelective.target
string target
Definition: copySelective.py:37
RCU::Shell::exec
void exec(const std::string &cmd)
effects: execute the given command guarantee: strong failures: out of memory II failures: system fail...
Definition: ShellExec.cxx:29
SH::SampleLocal
A Sample based on a simple file list.
Definition: SampleLocal.h:38
EL::Job::sampleHandler
const SH::SampleHandler & sampleHandler() const
description: the sample handler used guarantee: no-fail / strong failures: out of memory II
Definition: Job.cxx:192
DirectDriver.h
SH::SampleHandler
A class that manages a list of Sample objects.
Definition: SampleHandler.h:60
RCU_THROW_MSG
#define RCU_THROW_MSG(message)
Definition: PrintMsg.h:58
SH::SampleHandler::get
Sample * get(const std::string &name)
get the sample with the given name
SH::SampleLocal::add
void add(const std::string &file)
add a file to the list
EL::Job
Definition: Job.h:51
test_interactive_athena.job
job
Definition: test_interactive_athena.py:6
RCU_ASSERT
#define RCU_ASSERT(x)
Definition: Assert.h:222
RCU::Shell::quote
std::string quote(const std::string &name)
effects: quote the given name to protect it from the shell returns: the quoted name guarantee: strong...
Definition: ShellExec.cxx:75
Trk::split
@ split
Definition: LayerMaterialProperties.h:38