ATLAS Offline Software
Loading...
Searching...
No Matches
DuplicateChecker.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration
3*/
4
6
7
8//
9// includes
10//
11
14
16#include <EventLoop/Job.h>
17#include <EventLoop/IWorker.h>
23#include <TChain.h>
24#include <TTree.h>
25#include <fstream>
28
29//
30// method implementations
31//
32
34
35namespace EL
36{
37 void DuplicateChecker ::
38 testInvariant () const
39 {
40 RCU_INVARIANT (this != nullptr);
41 }
42
43
44
45 DuplicateChecker ::
46 DuplicateChecker ()
47 : m_eventInfoName ("EventInfo")
48 {
49 RCU_NEW_INVARIANT (this);
50 }
51
52
53
54 const std::string& DuplicateChecker ::
55 eventInfoName () const
56 {
57 RCU_READ_INVARIANT (this);
58 return m_eventInfoName;
59 }
60
61
62
63 void DuplicateChecker ::
64 setEventInfoName (const std::string& val_eventInfoName)
65 {
67 m_eventInfoName = val_eventInfoName;
68 }
69
70
71
72 const std::string& DuplicateChecker ::
73 outputTreeName () const
74 {
75 RCU_READ_INVARIANT (this);
76 return m_outputTreeName;
77 }
78
79
80
81 void DuplicateChecker ::
82 setOutputTreeName (const std::string& val_outputTreeName)
83 {
85 m_outputTreeName = val_outputTreeName;
86 }
87
88
89
90 void DuplicateChecker ::
91 addKnownDuplicate (const std::string& sampleName,
92 const std::string& fileName,
93 Long64_t entry, number_type runNumber,
94 number_type eventNumber)
95 {
97 m_duplicates[std::make_pair (sampleName, fileName)].insert
98 (std::make_pair (entry, std::make_pair (runNumber, eventNumber)));
99 }
100
101
102
103 void DuplicateChecker ::
104 addKnownDuplicatesFile (const std::string& duplicatesFile)
105 {
106 std::ifstream file (duplicatesFile);
107 std::string line;
108 while (std::getline (file, line))
109 {
110 if (!line.empty() && line[0] != '#')
111 {
112 std::string sampleName, fileName;
113 Long64_t entry;
114 number_type runNumber, eventNumber;
115 std::istringstream str (line);
116 if (!(str >> sampleName >> fileName >> entry >> runNumber >> eventNumber))
117 RCU_THROW_MSG ("failed to parse line: " + line);
118 addKnownDuplicate (sampleName, fileName, entry, runNumber, eventNumber);
119 }
120 }
121 }
122
123
124
125 bool DuplicateChecker ::
126 processSummary (const std::string& submitdir, const std::string& treeName)
127 {
129 sh.load (submitdir + "/hist");
130 sh.setMetaString (SH::MetaFields::treeName, treeName);
131 return processSummary (sh, submitdir + "/duplicates");
132 }
133
134
135
136 bool DuplicateChecker ::
137 processSummary (const SH::SampleHandler& sh, const std::string& outputFile)
138 {
139 std::ofstream file (outputFile.c_str());
140 bool success = true;
141 for (SH::Sample *sample : sh)
142 {
143 std::set<std::pair<number_type,number_type>> known_list;
144 std::set<std::pair<number_type,number_type>> processed_list;
145
146 std::unique_ptr<TChain> tree (sample->makeTChain());
147 std::string *inputFileName = nullptr;
148 tree->SetBranchAddress ("fileName", &inputFileName);
149 Long64_t inputFileIndex;
150 tree->SetBranchAddress ("fileIndex", &inputFileIndex);
151 number_type runNumber;
152 tree->SetBranchAddress ("runNumber", &runNumber);
153 number_type eventNumber;
154 tree->SetBranchAddress ("eventNumber", &eventNumber);
155 Bool_t processed;
156 tree->SetBranchAddress ("processed", &processed);
157 for (Long64_t entry = 0, entries = tree->GetEntries();
158 entry < entries; ++ entry)
159 {
160 if (tree->GetEntry (entry) < 0)
161 RCU_THROW_MSG ("failed to read tree entry");
162 std::pair<number_type,number_type> id (runNumber, eventNumber);
163
164 if (known_list.find (id) == known_list.end())
165 known_list.insert (id);
166 else
167 {
168 file << sample->name() << " " << *inputFileName << " "
169 << inputFileIndex << " " << runNumber << " "
170 << eventNumber << std::endl;
171 }
172
173 if (processed)
174 {
175 if (processed_list.find (id) == processed_list.end())
176 processed_list.insert (id);
177 else
178 {
179 success = false;
180 std::cout << "ERROR: duplicate event processed: "
181 << sample->name() << " "
182 << runNumber << " " << eventNumber << std::endl;
183 }
184 }
185 }
186 if (known_list.size() != processed_list.size())
187 {
188 RCU_ASSERT (processed_list.size() < known_list.size());
189 success = false;
190 for (auto& id : known_list)
191 {
192 if (processed_list.find (id) == processed_list.end())
193 {
194 std::cout << "ERROR: event never processed: "
195 << sample->name() << " "
196 << runNumber << " " << eventNumber << std::endl;
197 }
198 }
199 }
200 }
201 return success;
202 }
203
204
205
206 StatusCode DuplicateChecker ::
207 setupJob (Job& job)
208 {
210 job.useXAOD ();
211 return StatusCode::SUCCESS;
212 }
213
214
215
216 StatusCode DuplicateChecker ::
217 changeInput (bool /*firstFile*/)
218 {
220 m_inputFileName = wk()->inputFileName();
221
222 auto iter = m_duplicates.find
223 (std::make_pair (wk()->metaData()->castString (SH::MetaNames::sampleName()), m_inputFileName));
224 if (iter != m_duplicates.end())
225 m_currentDuplicates = &iter->second;
226 else
227 m_currentDuplicates = nullptr;
228 return StatusCode::SUCCESS;
229 }
230
231
232
233 StatusCode DuplicateChecker ::
234 initialize ()
235 {
237
238 m_event = wk()->xaodEvent();
239
240 if (!m_outputTreeName.empty())
241 {
242 m_outputTree = new TTree (m_outputTreeName.c_str(), "DuplicateChecker tree");
243 wk()->addOutput (m_outputTree);
244 m_outputTree->Branch ("fileName", &m_inputFileName);
245 m_outputTree->Branch ("fileIndex", &m_inputFileIndex, "inputFileIndex/L");
246 m_outputTree->Branch ("runNumber", &m_runNumber, "runNumber/i");
247 m_outputTree->Branch ("eventNumber", &m_eventNumber, "eventNumber/i");
248 m_outputTree->Branch ("processed", &m_processEvent, "processed/O");
249 }
250
251 return StatusCode::SUCCESS;
252 }
253
254
255
256 StatusCode DuplicateChecker ::
257 execute ()
258 {
260
261 read_run_event_number ();
262 m_processEvent = true;
263
264 const std::pair<number_type,number_type> id (m_runNumber, m_eventNumber);
265
266 if (m_currentDuplicates != nullptr)
267 {
268 auto iter = m_currentDuplicates->find (wk()->treeEntry());
269 if (iter != m_currentDuplicates->end())
270 {
271 if (id != iter->second)
272 {
273 ATH_MSG_FATAL ("expected duplicate event at entry " << iter->first
274 << " to be run=" << iter->second.first << ",event="
275 << iter->second.second << " but found run="
276 << id.first << ",event=" << id.second);
277 return StatusCode::FAILURE;
278 }
279 m_processEvent = false;
280 }
281 }
282
283 // implicitly skip duplicates in same file
284 if (m_processEvent)
285 {
286 if (m_processed.find (id) != m_processed.end())
287 {
288 ATH_MSG_WARNING ("skipping duplicate event run=" << id.first
289 << " event=" << id.second << " in file "
290 << wk()->inputFileName() << " at event "
291 << wk()->treeEntry());
292 m_processEvent = false;
293 } else
294 m_processed.insert (id);
295 }
296 if (m_processEvent == false)
297 wk()->skipEvent ();
298 if (m_outputTree)
299 {
300 m_inputFileIndex = wk()->treeEntry ();
301 m_outputTree->Fill ();
302 // m_inputFileName.clear ();
303 }
304
305 return StatusCode::SUCCESS;
306 }
307
308
309
310 void DuplicateChecker ::
311 read_run_event_number ()
312 {
313 const xAOD::EventInfo *info = nullptr;
314 if (m_event->retrieve (info, m_eventInfoName).isFailure())
315 RCU_THROW_MSG ("didn't find event info");
316
317 m_runNumber = info->runNumber();
318 m_eventNumber = info->eventNumber();
319 }
320}
#define RCU_INVARIANT(x)
Definition Assert.h:201
#define RCU_ASSERT(x)
Definition Assert.h:222
#define RCU_CHANGE_INVARIANT(x)
Definition Assert.h:231
#define RCU_NEW_INVARIANT(x)
Definition Assert.h:233
#define RCU_READ_INVARIANT(x)
Definition Assert.h:229
#define ATH_MSG_FATAL(x)
#define ATH_MSG_WARNING(x)
ClassImp(EL::DuplicateChecker) namespace EL
#define RCU_THROW_MSG(message)
Definition PrintMsg.h:58
A class that manages a list of Sample objects.
a base class that manages a set of files belonging to a particular data set and the associated meta-d...
Definition Sample.h:54
double entries
Definition listroot.cxx:49
This module defines the arguments passed from the BATCH driver to the BATCH worker.
EventInfo_v1 EventInfo
Definition of the latest event info version.
static const std::string treeName
the name of the tree in the sample
Definition MetaFields.h:52
static const std::string & sampleName()
the name of the sample being processed
Definition MetaNames.cxx:57
TChain * tree
TFile * file