ATLAS Offline Software
Functions
DuplicateChecker.cxx File Reference
#include <EventLoopAlgs/DuplicateChecker.h>
#include <EventLoopAlgs/Global.h>
#include <AsgMessaging/MsgStream.h>
#include <EventLoop/Job.h>
#include <EventLoop/IWorker.h>
#include <RootCoreUtils/Assert.h>
#include <RootCoreUtils/ThrowMsg.h>
#include <SampleHandler/MetaFields.h>
#include <SampleHandler/MetaNames.h>
#include <SampleHandler/Sample.h>
#include <TChain.h>
#include <TTree.h>
#include <fstream>
#include <xAODEventInfo/EventInfo.h>
#include <xAODRootAccess/TEvent.h>

Go to the source code of this file.

Functions

 ClassImp (EL::DuplicateChecker) namespace EL
 

Function Documentation

◆ ClassImp()

ClassImp ( EL::DuplicateChecker  )
Author
Nils Krumnack

Definition at line 33 of file DuplicateChecker.cxx.

36 {
37  void DuplicateChecker ::
38  testInvariant () const
39  {
40  RCU_INVARIANT (this != nullptr);
41  }
42 
43 
44 
45  DuplicateChecker ::
46  DuplicateChecker ()
47  : m_eventInfoName ("EventInfo")
48  {
49  RCU_NEW_INVARIANT (this);
50  }
51 
52 
53 
54  const std::string& DuplicateChecker ::
55  eventInfoName () const
56  {
57  RCU_READ_INVARIANT (this);
58  return m_eventInfoName;
59  }
60 
61 
62 
63  void DuplicateChecker ::
64  setEventInfoName (const std::string& val_eventInfoName)
65  {
66  RCU_CHANGE_INVARIANT (this);
67  m_eventInfoName = val_eventInfoName;
68  }
69 
70 
71 
72  const std::string& DuplicateChecker ::
73  outputTreeName () const
74  {
75  RCU_READ_INVARIANT (this);
76  return m_outputTreeName;
77  }
78 
79 
80 
81  void DuplicateChecker ::
82  setOutputTreeName (const std::string& val_outputTreeName)
83  {
84  RCU_CHANGE_INVARIANT (this);
85  m_outputTreeName = val_outputTreeName;
86  }
87 
88 
89 
90  void DuplicateChecker ::
91  addKnownDuplicate (const std::string& sampleName,
92  const std::string& fileName,
93  Long64_t entry, number_type runNumber,
95  {
96  RCU_CHANGE_INVARIANT (this);
97  m_duplicates[std::make_pair (sampleName, fileName)].insert
98  (std::make_pair (entry, std::make_pair (runNumber, eventNumber)));
99  }
100 
101 
102 
103  void DuplicateChecker ::
104  addKnownDuplicatesFile (const std::string& duplicatesFile)
105  {
106  std::ifstream file (duplicatesFile);
107  std::string line;
108  while (std::getline (file, line))
109  {
110  if (!line.empty() && line[0] != '#')
111  {
112  std::string sampleName, fileName;
113  Long64_t entry;
115  std::istringstream str (line);
116  if (!(str >> sampleName >> fileName >> entry >> runNumber >> eventNumber))
117  RCU_THROW_MSG ("failed to parse line: " + line);
118  addKnownDuplicate (sampleName, fileName, entry, runNumber, eventNumber);
119  }
120  }
121  }
122 
123 
124 
125  bool DuplicateChecker ::
126  processSummary (const std::string& submitdir, const std::string& treeName)
127  {
129  sh.load (submitdir + "/hist");
130  sh.setMetaString (SH::MetaFields::treeName, treeName);
131  return processSummary (sh, submitdir + "/duplicates");
132  }
133 
134 
135 
136  bool DuplicateChecker ::
137  processSummary (const SH::SampleHandler& sh, const std::string& outputFile)
138  {
139  std::ofstream file (outputFile.c_str());
140  bool success = true;
141  for (SH::Sample *sample : sh)
142  {
143  std::set<std::pair<number_type,number_type>> known_list;
144  std::set<std::pair<number_type,number_type>> processed_list;
145 
146  std::unique_ptr<TChain> tree (sample->makeTChain());
147  std::string *inputFileName = nullptr;
148  tree->SetBranchAddress ("fileName", &inputFileName);
149  Long64_t inputFileIndex;
150  tree->SetBranchAddress ("fileIndex", &inputFileIndex);
152  tree->SetBranchAddress ("runNumber", &runNumber);
154  tree->SetBranchAddress ("eventNumber", &eventNumber);
155  Bool_t processed;
156  tree->SetBranchAddress ("processed", &processed);
157  for (Long64_t entry = 0, entries = tree->GetEntries();
158  entry < entries; ++ entry)
159  {
160  if (tree->GetEntry (entry) < 0)
161  RCU_THROW_MSG ("failed to read tree entry");
162  std::pair<number_type,number_type> id (runNumber, eventNumber);
163 
164  if (known_list.find (id) == known_list.end())
165  known_list.insert (id);
166  else
167  {
168  file << sample->name() << " " << *inputFileName << " "
169  << inputFileIndex << " " << runNumber << " "
170  << eventNumber << std::endl;
171  }
172 
173  if (processed)
174  {
175  if (processed_list.find (id) == processed_list.end())
176  processed_list.insert (id);
177  else
178  {
179  success = false;
180  std::cout << "ERROR: duplicate event processed: "
181  << sample->name() << " "
182  << runNumber << " " << eventNumber << std::endl;
183  }
184  }
185  }
186  if (known_list.size() != processed_list.size())
187  {
188  RCU_ASSERT (processed_list.size() < known_list.size());
189  success = false;
190  for (auto& id : known_list)
191  {
192  if (processed_list.find (id) == processed_list.end())
193  {
194  std::cout << "ERROR: event never processed: "
195  << sample->name() << " "
196  << runNumber << " " << eventNumber << std::endl;
197  }
198  }
199  }
200  }
201  return success;
202  }
203 
204 
205 
206  StatusCode DuplicateChecker ::
207  setupJob (Job& job)
208  {
209  RCU_CHANGE_INVARIANT (this);
210  job.useXAOD ();
211  return StatusCode::SUCCESS;
212  }
213 
214 
215 
216  StatusCode DuplicateChecker ::
217  changeInput (bool /*firstFile*/)
218  {
219  RCU_CHANGE_INVARIANT (this);
220  m_inputFileName = wk()->inputFileName();
221 
222  auto iter = m_duplicates.find
223  (std::make_pair (wk()->metaData()->castString (SH::MetaNames::sampleName()), m_inputFileName));
224  if (iter != m_duplicates.end())
225  m_currentDuplicates = &iter->second;
226  else
227  m_currentDuplicates = nullptr;
228  return StatusCode::SUCCESS;
229  }
230 
231 
232 
234  initialize ()
235  {
236  RCU_CHANGE_INVARIANT (this);
237 
238  m_event = wk()->xaodEvent();
239 
240  if (!m_outputTreeName.empty())
241  {
242  m_outputTree = new TTree (m_outputTreeName.c_str(), "DuplicateChecker tree");
243  wk()->addOutput (m_outputTree);
244  m_outputTree->Branch ("fileName", &m_inputFileName);
245  m_outputTree->Branch ("fileIndex", &m_inputFileIndex, "inputFileIndex/L");
246  m_outputTree->Branch ("runNumber", &m_runNumber, "runNumber/i");
247  m_outputTree->Branch ("eventNumber", &m_eventNumber, "eventNumber/i");
248  m_outputTree->Branch ("processed", &m_processEvent, "processed/O");
249  }
250 
251  return StatusCode::SUCCESS;
252  }
253 
254 
255 
257  execute ()
258  {
259  RCU_CHANGE_INVARIANT (this);
260 
261  read_run_event_number ();
262  m_processEvent = true;
263 
264  const std::pair<number_type,number_type> id (m_runNumber, m_eventNumber);
265 
266  if (m_currentDuplicates != nullptr)
267  {
268  auto iter = m_currentDuplicates->find (wk()->treeEntry());
269  if (iter != m_currentDuplicates->end())
270  {
271  if (id != iter->second)
272  {
273  ATH_MSG_FATAL ("expected duplicate event at entry " << iter->first
274  << " to be run=" << iter->second.first << ",event="
275  << iter->second.second << " but found run="
276  << id.first << ",event=" << id.second);
277  return StatusCode::FAILURE;
278  }
279  m_processEvent = false;
280  }
281  }
282 
283  // implicitly skip duplicates in same file
284  if (m_processEvent)
285  {
286  if (m_processed.find (id) != m_processed.end())
287  {
288  ATH_MSG_WARNING ("skipping duplicate event run=" << id.first
289  << " event=" << id.second << " in file "
290  << wk()->inputFileName() << " at event "
291  << wk()->treeEntry());
292  m_processEvent = false;
293  } else
294  m_processed.insert (id);
295  }
296  if (m_processEvent == false)
297  wk()->skipEvent ();
298  if (m_outputTree)
299  {
300  m_inputFileIndex = wk()->treeEntry ();
301  m_outputTree->Fill ();
302  // m_inputFileName.clear ();
303  }
304 
305  return StatusCode::SUCCESS;
306  }
307 
308 
309 
310  void DuplicateChecker ::
311  read_run_event_number ()
312  {
313  const xAOD::EventInfo *info = nullptr;
314  if (m_event->retrieve (info, m_eventInfoName).isFailure())
315  RCU_THROW_MSG ("didn't find event info");
316 
317  m_runNumber = info->runNumber();
318  m_eventNumber = info->eventNumber();
319  }
320 }
grepfile.info
info
Definition: grepfile.py:38
python.SystemOfUnits.second
int second
Definition: SystemOfUnits.py:120
ATH_MSG_FATAL
#define ATH_MSG_FATAL(x)
Definition: AthMsgStreamMacros.h:34
checkFileSG.line
line
Definition: checkFileSG.py:75
initialize
void initialize()
Definition: run_EoverP.cxx:894
tree
TChain * tree
Definition: tile_monitor.h:30
AtlasMcWeight::number_type
unsigned int number_type
Definition: AtlasMcWeight.h:20
SH::MetaNames::sampleName
static const std::string & sampleName()
the name of the sample being processed
Definition: MetaNames.cxx:57
LArG4FSStartPointFilterLegacy.execute
execute
Definition: LArG4FSStartPointFilterLegacy.py:20
compareGeometries.outputFile
string outputFile
Definition: compareGeometries.py:25
FortranAlgorithmOptions.fileName
fileName
Definition: FortranAlgorithmOptions.py:13
FullCPAlgorithmsTest_eljob.sh
sh
Definition: FullCPAlgorithmsTest_eljob.py:111
FullCPAlgorithmsTest_eljob.sample
sample
Definition: FullCPAlgorithmsTest_eljob.py:113
RCU::Shell
Definition: ShellExec.cxx:28
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
file
TFile * file
Definition: tile_monitor.h:29
dumpFileToPlots.treeName
string treeName
Definition: dumpFileToPlots.py:20
xAOD::eventNumber
eventNumber
Definition: EventInfo_v1.cxx:124
SH::MetaFields::treeName
static const std::string treeName
the name of the tree in the sample
Definition: MetaFields.h:52
RCU_INVARIANT
#define RCU_INVARIANT(x)
Definition: Assert.h:201
SH::Sample
a base class that manages a set of files belonging to a particular data set and the associated meta-d...
Definition: Sample.h:54
GetAllXsec.entry
list entry
Definition: GetAllXsec.py:132
id
SG::auxid_t id
Definition: Control/AthContainers/Root/debug.cxx:227
xAOD::EventInfo_v1
Class describing the basic event information.
Definition: EventInfo_v1.h:43
DeMoAtlasDataLoss.runNumber
string runNumber
Definition: DeMoAtlasDataLoss.py:64
python.CaloScaleNoiseConfig.str
str
Definition: CaloScaleNoiseConfig.py:78
ATH_MSG_WARNING
#define ATH_MSG_WARNING(x)
Definition: AthMsgStreamMacros.h:32
DeMoScan.first
bool first
Definition: DeMoScan.py:536
entries
double entries
Definition: listroot.cxx:49
RCU_CHANGE_INVARIANT
#define RCU_CHANGE_INVARIANT(x)
Definition: Assert.h:231
SH::SampleHandler
A class that manages a list of Sample objects.
Definition: SampleHandler.h:60
str
Definition: BTagTrackIpAccessor.cxx:11
RCU_THROW_MSG
#define RCU_THROW_MSG(message)
Definition: PrintMsg.h:58
test_interactive_athena.job
job
Definition: test_interactive_athena.py:6
RCU_ASSERT
#define RCU_ASSERT(x)
Definition: Assert.h:222
RCU_READ_INVARIANT
#define RCU_READ_INVARIANT(x)
Definition: Assert.h:229
RCU_NEW_INVARIANT
#define RCU_NEW_INVARIANT(x)
Definition: Assert.h:233