ATLAS Offline Software
DuplicateChecker.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration
3 */
4 
6 
7 
8 //
9 // includes
10 //
11 
13 #include <EventLoopAlgs/Global.h>
14 
15 #include <AsgMessaging/MsgStream.h>
16 #include <EventLoop/Job.h>
17 #include <EventLoop/IWorker.h>
18 #include <RootCoreUtils/Assert.h>
19 #include <RootCoreUtils/ThrowMsg.h>
22 #include <SampleHandler/Sample.h>
23 #include <TChain.h>
24 #include <TTree.h>
25 #include <fstream>
27 #include <xAODRootAccess/TEvent.h>
28 
29 //
30 // method implementations
31 //
32 
34 
35 namespace EL
36 {
38  testInvariant () const
39  {
40  RCU_INVARIANT (this != nullptr);
41  }
42 
43 
44 
47  : m_eventInfoName ("EventInfo")
48  {
49  RCU_NEW_INVARIANT (this);
50  }
51 
52 
53 
54  const std::string& DuplicateChecker ::
55  eventInfoName () const
56  {
57  RCU_READ_INVARIANT (this);
58  return m_eventInfoName;
59  }
60 
61 
62 
64  setEventInfoName (const std::string& val_eventInfoName)
65  {
66  RCU_CHANGE_INVARIANT (this);
67  m_eventInfoName = val_eventInfoName;
68  }
69 
70 
71 
72  const std::string& DuplicateChecker ::
73  outputTreeName () const
74  {
75  RCU_READ_INVARIANT (this);
76  return m_outputTreeName;
77  }
78 
79 
80 
82  setOutputTreeName (const std::string& val_outputTreeName)
83  {
84  RCU_CHANGE_INVARIANT (this);
85  m_outputTreeName = val_outputTreeName;
86  }
87 
88 
89 
91  addKnownDuplicate (const std::string& sampleName,
92  const std::string& fileName,
93  Long64_t entry, number_type runNumber,
95  {
96  RCU_CHANGE_INVARIANT (this);
97  m_duplicates[std::make_pair (sampleName, fileName)].insert
98  (std::make_pair (entry, std::make_pair (runNumber, eventNumber)));
99  }
100 
101 
102 
104  addKnownDuplicatesFile (const std::string& duplicatesFile)
105  {
106  std::ifstream file (duplicatesFile);
107  std::string line;
108  while (std::getline (file, line))
109  {
110  if (!line.empty() && line[0] != '#')
111  {
112  std::string sampleName, fileName;
113  Long64_t entry;
115  std::istringstream str (line);
116  if (!(str >> sampleName >> fileName >> entry >> runNumber >> eventNumber))
117  RCU_THROW_MSG ("failed to parse line: " + line);
119  }
120  }
121  }
122 
123 
124 
126  processSummary (const std::string& submitdir, const std::string& treeName)
127  {
129  sh.load (submitdir + "/hist");
130  sh.setMetaString (SH::MetaFields::treeName, treeName);
131  return processSummary (sh, submitdir + "/duplicates");
132  }
133 
134 
135 
137  processSummary (const SH::SampleHandler& sh, const std::string& outputFile)
138  {
139  std::ofstream file (outputFile.c_str());
140  bool success = true;
141  for (SH::Sample *sample : sh)
142  {
143  std::set<std::pair<number_type,number_type>> known_list;
144  std::set<std::pair<number_type,number_type>> processed_list;
145 
146  std::unique_ptr<TChain> tree (sample->makeTChain());
147  std::string *inputFileName = nullptr;
148  tree->SetBranchAddress ("fileName", &inputFileName);
149  Long64_t inputFileIndex;
150  tree->SetBranchAddress ("fileIndex", &inputFileIndex);
152  tree->SetBranchAddress ("runNumber", &runNumber);
154  tree->SetBranchAddress ("eventNumber", &eventNumber);
155  Bool_t processed;
156  tree->SetBranchAddress ("processed", &processed);
157  for (Long64_t entry = 0, entries = tree->GetEntries();
158  entry < entries; ++ entry)
159  {
160  if (tree->GetEntry (entry) < 0)
161  RCU_THROW_MSG ("failed to read tree entry");
162  std::pair<number_type,number_type> id (runNumber, eventNumber);
163 
164  if (known_list.find (id) == known_list.end())
165  known_list.insert (id);
166  else
167  {
168  file << sample->name() << " " << *inputFileName << " "
169  << inputFileIndex << " " << runNumber << " "
170  << eventNumber << std::endl;
171  }
172 
173  if (processed)
174  {
175  if (processed_list.find (id) == processed_list.end())
176  processed_list.insert (id);
177  else
178  {
179  success = false;
180  std::cout << "ERROR: duplicate event processed: "
181  << sample->name() << " "
182  << runNumber << " " << eventNumber << std::endl;
183  }
184  }
185  }
186  if (known_list.size() != processed_list.size())
187  {
188  RCU_ASSERT (processed_list.size() < known_list.size());
189  success = false;
190  for (auto& id : known_list)
191  {
192  if (processed_list.find (id) == processed_list.end())
193  {
194  std::cout << "ERROR: event never processed: "
195  << sample->name() << " "
196  << runNumber << " " << eventNumber << std::endl;
197  }
198  }
199  }
200  }
201  return success;
202  }
203 
204 
205 
207  setupJob (Job& job)
208  {
209  RCU_CHANGE_INVARIANT (this);
210  job.useXAOD ();
211  return StatusCode::SUCCESS;
212  }
213 
214 
215 
217  changeInput (bool /*firstFile*/)
218  {
219  RCU_CHANGE_INVARIANT (this);
221 
222  auto iter = m_duplicates.find
223  (std::make_pair (wk()->metaData()->castString (SH::MetaNames::sampleName()), m_inputFileName));
224  if (iter != m_duplicates.end())
225  m_currentDuplicates = &iter->second;
226  else
227  m_currentDuplicates = nullptr;
228  return StatusCode::SUCCESS;
229  }
230 
231 
232 
234  initialize ()
235  {
236  RCU_CHANGE_INVARIANT (this);
237 
238  m_event = wk()->xaodEvent();
239 
240  if (!m_outputTreeName.empty())
241  {
242  m_outputTree = new TTree (m_outputTreeName.c_str(), "DuplicateChecker tree");
244  m_outputTree->Branch ("fileName", &m_inputFileName);
245  m_outputTree->Branch ("fileIndex", &m_inputFileIndex, "inputFileIndex/L");
246  m_outputTree->Branch ("runNumber", &m_runNumber, "runNumber/i");
247  m_outputTree->Branch ("eventNumber", &m_eventNumber, "eventNumber/i");
248  m_outputTree->Branch ("processed", &m_processEvent, "processed/O");
249  }
250 
251  return StatusCode::SUCCESS;
252  }
253 
254 
255 
257  execute ()
258  {
259  RCU_CHANGE_INVARIANT (this);
260 
262  m_processEvent = true;
263 
264  const std::pair<number_type,number_type> id (m_runNumber, m_eventNumber);
265 
266  if (m_currentDuplicates != nullptr)
267  {
268  auto iter = m_currentDuplicates->find (wk()->treeEntry());
269  if (iter != m_currentDuplicates->end())
270  {
271  if (id != iter->second)
272  {
273  ATH_MSG_FATAL ("expected duplicate event at entry " << iter->first
274  << " to be run=" << iter->second.first << ",event="
275  << iter->second.second << " but found run="
276  << id.first << ",event=" << id.second);
277  return StatusCode::FAILURE;
278  }
279  m_processEvent = false;
280  }
281  }
282 
283  // implicitly skip duplicates in same file
284  if (m_processEvent)
285  {
286  if (m_processed.find (id) != m_processed.end())
287  {
288  ATH_MSG_WARNING ("skipping duplicate event run=" << id.first
289  << " event=" << id.second << " in file "
290  << wk()->inputFileName() << " at event "
291  << wk()->treeEntry());
292  m_processEvent = false;
293  } else
294  m_processed.insert (id);
295  }
296  if (m_processEvent == false)
297  wk()->skipEvent ();
298  if (m_outputTree)
299  {
300  m_inputFileIndex = wk()->treeEntry ();
301  m_outputTree->Fill ();
302  // m_inputFileName.clear ();
303  }
304 
305  return StatusCode::SUCCESS;
306  }
307 
308 
309 
312  {
313  const xAOD::EventInfo *info = nullptr;
314  if (m_event->retrieve (info, m_eventInfoName).isFailure())
315  RCU_THROW_MSG ("didn't find event info");
316 
317  m_runNumber = info->runNumber();
318  m_eventNumber = info->eventNumber();
319  }
320 }
grepfile.info
info
Definition: grepfile.py:38
python.SystemOfUnits.second
int second
Definition: SystemOfUnits.py:120
EL::DuplicateChecker::number_type
uint32_t number_type
the integer type to use for run and event numbers
Definition: DuplicateChecker.h:38
ATH_MSG_FATAL
#define ATH_MSG_FATAL(x)
Definition: AthMsgStreamMacros.h:34
DuplicateChecker.h
EL::DuplicateChecker::setOutputTreeName
void setOutputTreeName(const std::string &val_outputTreeName)
set the value of outputTreeName
checkFileSG.line
line
Definition: checkFileSG.py:75
ClassImp
ClassImp(EL::DuplicateChecker) namespace EL
Definition: DuplicateChecker.cxx:33
EL::DuplicateChecker::eventInfoName
const std::string & eventInfoName() const
the name of the EventInfo structure to use
EL::DuplicateChecker::m_processed
std::set< std::pair< number_type, number_type > > m_processed
the list of run-event numbers already encountered
Definition: DuplicateChecker.h:188
EL::DuplicateChecker::m_inputFileName
std::string m_inputFileName
the name of the input file (connected to m_outputTree, if present)
Definition: DuplicateChecker.h:202
EL::IWorker::skipEvent
virtual void skipEvent()=0
effects: skip the current event, i.e.
EL::IWorker::xaodEvent
virtual xAOD::TEvent * xaodEvent() const =0
description: the xAOD event and store guarantee: strong failures: out of memory I failures: TEventSvc...
EL::DuplicateChecker::changeInput
virtual StatusCode changeInput(bool firstFile) override
effects: do all changes to work with a new input file, e.g.
Job.h
EL::DuplicateChecker::addKnownDuplicatesFile
void addKnownDuplicatesFile(const std::string &duplicatesFile)
add a file with known duplicates
tree
TChain * tree
Definition: tile_monitor.h:30
EL::IHistogramWorker::addOutput
virtual void addOutput(TObject *output_swallow)=0
effects: add an object to the output.
EL::DuplicateChecker::m_eventNumber
number_type m_eventNumber
the event number of the current event (connected to m_outputTree, if present)
Definition: DuplicateChecker.h:217
EL::DuplicateChecker::execute
virtual StatusCode execute() override
effects: process the next event guarantee: basic failures: algorithm dependent
EL::DuplicateChecker::m_inputFileIndex
Long64_t m_inputFileIndex
the index in the input file (connected to m_outputTree, if present)
Definition: DuplicateChecker.h:207
AtlasMcWeight::number_type
unsigned int number_type
Definition: AtlasMcWeight.h:20
Assert.h
MetaNames.h
Global.h
EL::DuplicateChecker::processSummary
static bool processSummary(const std::string &submitdir, const std::string &treeName)
process the summary tree from the given submission
SH::MetaNames::sampleName
static const std::string & sampleName()
the name of the sample being processed
Definition: MetaNames.cxx:57
compareGeometries.outputFile
string outputFile
Definition: compareGeometries.py:25
EL::DuplicateChecker::m_runNumber
number_type m_runNumber
the run number of the current event (connected to m_outputTree, if present)
Definition: DuplicateChecker.h:212
EL::DuplicateChecker::m_processEvent
Bool_t m_processEvent
whether the current event is/should be processed (connected to m_outputTree, if present)
Definition: DuplicateChecker.h:222
FortranAlgorithmOptions.fileName
fileName
Definition: FortranAlgorithmOptions.py:13
FullCPAlgorithmsTest_eljob.sh
sh
Definition: FullCPAlgorithmsTest_eljob.py:111
FullCPAlgorithmsTest_eljob.sample
sample
Definition: FullCPAlgorithmsTest_eljob.py:113
EL::DuplicateChecker::outputTreeName
const std::string & outputTreeName() const
the name of the output tree to create, or the empty string if none is created
EL::IWorker::treeEntry
virtual Long64_t treeEntry() const =0
description: the entry in the tree we are reading guarantee: no-fail
EL::IWorker::inputFileName
virtual std::string inputFileName() const =0
the name of the file we are reading the current tree from, without the path component
RCU::Shell
Definition: ShellExec.cxx:28
EL::DuplicateChecker::m_duplicates
std::map< std::pair< std::string, std::string >, std::map< Long64_t, std::pair< number_type, number_type > > > m_duplicates
the list of known duplicates to skip
Definition: DuplicateChecker.h:178
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
EL::DuplicateChecker::testInvariant
void testInvariant() const
test the invariant of this object
TEvent.h
file
TFile * file
Definition: tile_monitor.h:29
dumpFileToPlots.treeName
string treeName
Definition: dumpFileToPlots.py:20
EL
This module defines the arguments passed from the BATCH driver to the BATCH worker.
Definition: AlgorithmWorkerData.h:24
xAOD::eventNumber
eventNumber
Definition: EventInfo_v1.cxx:124
EL::Algorithm::wk
IWorker * wk() const
description: the worker that is controlling us guarantee: no-fail
SH::MetaFields::treeName
static const std::string treeName
the name of the tree in the sample
Definition: MetaFields.h:52
RCU_INVARIANT
#define RCU_INVARIANT(x)
Definition: Assert.h:201
SH::Sample
a base class that manages a set of files belonging to a particular data set and the associated meta-d...
Definition: Sample.h:54
GetAllXsec.entry
list entry
Definition: GetAllXsec.py:132
id
SG::auxid_t id
Definition: Control/AthContainers/Root/debug.cxx:227
MetaFields.h
ThrowMsg.h
EL::DuplicateChecker::m_outputTree
TTree * m_outputTree
the output tree, if we are creating one
Definition: DuplicateChecker.h:197
EventInfo.h
xAOD::EventInfo_v1
Class describing the basic event information.
Definition: EventInfo_v1.h:43
EL::DuplicateChecker::m_event
xAOD::TEvent * m_event
the event we are reading from
Definition: DuplicateChecker.h:192
DeMoAtlasDataLoss.runNumber
string runNumber
Definition: DeMoAtlasDataLoss.py:64
python.CaloScaleNoiseConfig.str
str
Definition: CaloScaleNoiseConfig.py:78
ATH_MSG_WARNING
#define ATH_MSG_WARNING(x)
Definition: AthMsgStreamMacros.h:32
EL::DuplicateChecker::m_eventInfoName
std::string m_eventInfoName
the value returned by eventInfoName
Definition: DuplicateChecker.h:170
EL::DuplicateChecker::DuplicateChecker
DuplicateChecker()
standard constructor
xAOD::TEvent::retrieve
StatusCode retrieve(const T *&obj, const std::string &key)
Retrieve either an input or an output object from the event.
DeMoScan.first
bool first
Definition: DeMoScan.py:536
entries
double entries
Definition: listroot.cxx:49
EL::DuplicateChecker::setEventInfoName
void setEventInfoName(const std::string &val_eventInfoName)
set the value of eventInfoName
RCU_CHANGE_INVARIANT
#define RCU_CHANGE_INVARIANT(x)
Definition: Assert.h:231
EL::DuplicateChecker
Definition: DuplicateChecker.h:31
SH::SampleHandler
A class that manages a list of Sample objects.
Definition: SampleHandler.h:60
str
Definition: BTagTrackIpAccessor.cxx:11
EL::DuplicateChecker::m_currentDuplicates
std::map< Long64_t, std::pair< number_type, number_type > > * m_currentDuplicates
the list of the duplicates in the current file to skip, or the null pointer if there are none
Definition: DuplicateChecker.h:183
RCU_THROW_MSG
#define RCU_THROW_MSG(message)
Definition: PrintMsg.h:58
EL::DuplicateChecker::read_run_event_number
void read_run_event_number()
get the run and event number for the current event
EL::DuplicateChecker::initialize
virtual StatusCode initialize() override
effects: do everything that needs to be done before running the algorithm, e.g.
EL::DuplicateChecker::addKnownDuplicate
void addKnownDuplicate(const std::string &sampleName, const std::string &fileName, Long64_t entry, number_type runNumber, number_type eventNumber)
add a known duplicate event
test_interactive_athena.job
job
Definition: test_interactive_athena.py:6
IWorker.h
EL::DuplicateChecker::m_outputTreeName
std::string m_outputTreeName
the value returned by outputTreeName
Definition: DuplicateChecker.h:174
RCU_ASSERT
#define RCU_ASSERT(x)
Definition: Assert.h:222
RCU_READ_INVARIANT
#define RCU_READ_INVARIANT(x)
Definition: Assert.h:229
Sample.h
MsgStream.h
EL::DuplicateChecker::setupJob
virtual StatusCode setupJob(Job &job) override
effects: give the algorithm a chance to intialize the job with anything this algorithm needs.
RCU_NEW_INVARIANT
#define RCU_NEW_INVARIANT(x)
Definition: Assert.h:233