ATLAS Offline Software
ScanDir.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3 */
4 
6 
7 
8 //
9 // includes
10 //
11 
12 #include <SampleHandler/ScanDir.h>
13 
14 #include <RootCoreUtils/Assert.h>
16 #include <RootCoreUtils/ThrowMsg.h>
23 #include <TString.h>
24 #include <memory>
25 
26 //
27 // method implementations
28 //
29 
30 namespace SH
31 {
33  ScanDir ()
34  : m_relSampleDepth (-2), m_absSampleDepth (0),
35  m_minDepth (0), m_maxDepth (-1),
36  m_filePattern (RCU::glob_to_regexp ("*.root*")),
37  m_directoryPattern (RCU::glob_to_regexp ("*")),
38  m_samplePattern (RCU::glob_to_regexp ("*")),
39  m_samplePostfix (RCU::glob_to_regexp ("")),
40  m_samplePostfixEmpty (true),
41  m_extraNameComponent (0)
42  {}
43 
44 
45 
47  sampleDepth (int val_sampleDepth)
48  {
49  m_relSampleDepth = val_sampleDepth;
50  m_absSampleDepth = val_sampleDepth;
51  return *this;
52  }
53 
54 
55 
57  absSampleDepth (int val_absSampleDepth)
58  {
59  m_relSampleDepth = 0;
60  m_absSampleDepth = val_absSampleDepth;
61  return *this;
62  }
63 
64 
65 
67  sampleName (const std::string& val_sampleName)
68  {
69  m_sampleName = val_sampleName;
70  return *this;
71  }
72 
73 
74 
76  minDepth (std::size_t val_minDepth)
77  {
78  m_minDepth = val_minDepth;
79  return *this;
80  }
81 
82 
83 
85  maxDepth (std::size_t val_maxDepth)
86  {
87  m_maxDepth = val_maxDepth;
88  return *this;
89  }
90 
91 
92 
94  filePattern (const std::string& val_filePattern)
95  {
96  m_filePattern = RCU::glob_to_regexp (val_filePattern);
97  return *this;
98  }
99 
100 
101 
103  fileRegex (const std::string& val_fileRegex)
104  {
105  m_filePattern = val_fileRegex;
106  return *this;
107  }
108 
109 
110 
112  directoryPattern (const std::string& val_directoryPattern)
113  {
114  m_directoryPattern = RCU::glob_to_regexp (val_directoryPattern);
115  return *this;
116  }
117 
118 
119 
121  directoryRegex (const std::string& val_directoryRegex)
122  {
123  m_directoryPattern = val_directoryRegex;
124  return *this;
125  }
126 
127 
128 
130  samplePattern (const std::string& val_samplePattern)
131  {
132  m_samplePattern = RCU::glob_to_regexp (val_samplePattern);
133  return *this;
134  }
135 
136 
137 
139  samplePostfix (const std::string& val_samplePostfix)
140  {
141  m_samplePostfix = RCU::glob_to_regexp (val_samplePostfix);
142  m_samplePostfixEmpty = val_samplePostfix.empty();
143  return *this;
144  }
145 
146 
147 
149  sampleRename (const std::string& pattern, const std::string& name)
150  {
151  m_sampleRename.push_back (std::pair<boost::regex,std::string> (boost::regex (RCU::glob_to_regexp (pattern)), name));
152  return *this;
153  }
154 
155 
156 
158  extraNameComponent (int val_relSampleDepth)
159  {
160  RCU_REQUIRE (val_relSampleDepth != 0);
161  m_extraNameComponent = val_relSampleDepth;
162  return *this;
163  }
164 
165 
166 
168  scan (SampleHandler& sh, const std::string& dir) const
169  {
171  scan (sh, list);
172  return *this;
173  }
174 
175 
176 
178  scanEOS (SampleHandler& sh, const std::string& eosDir) const
179  {
180  DiskListEOS list (eosDir);
181  scan (sh, list);
182  return *this;
183  }
184 
185 
186 
189  {
190  std::vector<std::string> hierarchy;
191  hierarchy.push_back (list.dirname());
192 
193  std::map<std::string,SamplePtr> samples;
194  typedef std::map<std::string,SamplePtr>::iterator samplesIter;
195  recurse (samples, list, hierarchy);
196  for (samplesIter sample = samples.begin(), end = samples.end();
197  sample != end; ++ sample)
198  {
199  if (sample->second.get() != 0)
200  {
201  sh.add (sample->second);
202  }
203  }
204  return *this;
205  }
206 
207 
208 
210  recurse (std::map<std::string,SamplePtr>& samples, DiskList& list,
211  const std::vector<std::string>& hierarchy) const
212  {
213  using namespace msgScanDir;
214 
215  ANA_MSG_DEBUG ("scanning directory: " << list.dirname());
216  while (list.next())
217  {
218  std::unique_ptr<DiskList> sublist (list.openDir());
219 
220  if (sublist.get() != 0)
221  {
222  if (!RCU::match_expr (m_directoryPattern, list.fileName()))
223  {
224  ANA_MSG_DEBUG ("directory does not match pattern, skipping directory " << list.path());
225  } else if (hierarchy.size() > m_maxDepth)
226  {
227  ANA_MSG_DEBUG ("maxDepth exceeded, skipping directory " << list.path());
228  } else
229  {
230  ANA_MSG_DEBUG ("descending into directory " << list.path());
231  std::vector<std::string> subhierarchy = hierarchy;
232  subhierarchy.push_back (list.fileName());
233  recurse (samples, *sublist, subhierarchy);
234  }
235  } else
236  {
237  if (hierarchy.size() > m_minDepth &&
238  RCU::match_expr (m_filePattern, list.fileName()))
239  {
240  ANA_MSG_DEBUG ("adding file " << list.path());
241  std::vector<std::string> subhierarchy = hierarchy;
242  subhierarchy.push_back (list.fileName());
243  addSampleFile (samples, subhierarchy, list.path());
244  } else
245  {
246  ANA_MSG_DEBUG ("skipping file " << list.path());
247  }
248  }
249  }
250  }
251 
252 
253 
255  addSampleFile (std::map<std::string,SamplePtr>& samples,
256  const std::vector<std::string>& hierarchy,
257  const std::string& path) const
258  {
259  std::string sampleName;
260 
261  if (!m_sampleName.empty())
262  {
264  } else
265  {
268  if (sampleName.empty())
269  return;
270 
272  {
273  bool done = false;
274  for (std::size_t iter = 0, end = sampleName.size();
275  iter != end && !done; ++ iter)
276  {
277  if (RCU::match_expr (m_samplePostfix, sampleName.substr (iter)))
278  {
279  if (iter == 0)
280  RCU_THROW_MSG ("sample name matches entire postfix pattern: \"" + sampleName + "\"");
281  sampleName.resize (iter);
282  done = true;
283  }
284  }
285  }
286 
287  if (m_extraNameComponent != 0)
288  {
289  std::string component = findPathComponent
291  if (component.empty())
292  return;
293  sampleName += "_" + component;
294  }
295 
297  return;
298 
299  {
300  bool done = false;
301  for (SampleRenameIter iter = m_sampleRename.begin(),
302  end = m_sampleRename.end(); !done && iter != end; ++ iter)
303  {
304  if (RCU::match_expr (iter->first, sampleName))
305  {
306  sampleName = iter->second;
307  done = true;
308  }
309  }
310  }
311  }
312 
314  = samples.find (sampleName);
315  if (iter == samples.end())
316  {
318  samples[sampleName] = sample;
319  iter = samples.find (sampleName);
320  }
321  SampleLocal *sample = dynamic_cast<SampleLocal*>(iter->second.get());
322  RCU_ASSERT (sample != 0);
323  sample->add (path);
324  }
325 
326 
327 
328  std::string ScanDir ::
329  findPathComponent (const std::vector<std::string>& hierarchy,
330  int absSampleDepth,
331  int relSampleDepth) const
332  {
333  std::string sampleName;
334 
335  int myindex = absSampleDepth+1;
336  if (relSampleDepth < 0)
337  myindex = relSampleDepth + hierarchy.size();
338  if (std::size_t (myindex) >= hierarchy.size())
339  return sampleName;
340  if (myindex > 0)
341  {
342  sampleName = hierarchy[myindex];
343  } else
344  {
345  sampleName = hierarchy[0];
346  while (sampleName.empty() ||
347  sampleName[sampleName.size()-1] == '/' ||
348  myindex < 0)
349  {
350  while (!sampleName.empty() && sampleName[sampleName.size()-1] == '/')
351  sampleName.pop_back();
352  if (sampleName.empty())
353  return sampleName;
354  if (myindex < 0)
355  {
356  std::string::size_type split = sampleName.rfind ('/');
357  if (split == std::string::npos)
358  {
359  sampleName.clear ();
360  return sampleName;
361  }
362  sampleName.resize (split);
363  ++ myindex;
364  }
365  if (sampleName.empty())
366  return sampleName;
367  }
368  std::string::size_type split = sampleName.rfind ('/');
369  if (split != std::string::npos)
370  sampleName = sampleName.substr (split + 1);
371  }
372  return sampleName;
373  }
374 }
xAOD::iterator
JetConstituentVector::iterator iterator
Definition: JetConstituentVector.cxx:68
mergePhysValFiles.pattern
pattern
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:26
SH::ScanDir
the class used for scanning local directories and file servers for samples
Definition: ScanDir.h:38
SH::ScanDir::m_samplePattern
boost::regex m_samplePattern
the value set by samplePattern, converted to a regular expression
Definition: ScanDir.h:196
SH::ScanDir::sampleRename
ScanDir & sampleRename(const std::string &pattern, const std::string &name)
rename any sample matching pattern to name
Definition: ScanDir.cxx:149
MessageCheck.h
DiskListLocal.h
SH::ScanDir::directoryRegex
ScanDir & directoryRegex(const std::string &val_directoryRegex)
the regular expression for directories to be visited
Definition: ScanDir.cxx:121
SH::ScanDir::SampleRenameIter
std::vector< std::pair< boost::regex, std::string > >::const_iterator SampleRenameIter
the list of entries from sampleRename
Definition: ScanDir.h:210
SH::ScanDir::maxDepth
ScanDir & maxDepth(std::size_t val_maxDepth)
the maximum depth for files to make it into the sample
Definition: ScanDir.cxx:85
ScanDir.h
athena.path
path
python interpreter configuration --------------------------------------—
Definition: athena.py:128
SH::ScanDir::m_filePattern
boost::regex m_filePattern
the value set by filePattern, converted to a regular expression
Definition: ScanDir.h:186
DiskListEOS.h
SH::ScanDir::m_samplePostfix
boost::regex m_samplePostfix
the value set by samplePostfix, converted to a regular expression
Definition: ScanDir.h:201
SH::ScanDir::m_minDepth
std::size_t m_minDepth
the value set by minDepth
Definition: ScanDir.h:177
RCU_REQUIRE
#define RCU_REQUIRE(x)
Definition: Assert.h:208
SH::ScanDir::m_samplePostfixEmpty
bool m_samplePostfixEmpty
whether samplePostfix has been set to the empty string
Definition: ScanDir.h:206
SH::ScanDir::m_directoryPattern
boost::regex m_directoryPattern
the value set by directoryPattern, converted to a regular expression
Definition: ScanDir.h:191
SH::ScanDir::findPathComponent
std::string findPathComponent(const std::vector< std::string > &hierarchy, int absSampleDepth, int relSampleDepth) const
find the path component at the given depth
Definition: ScanDir.cxx:329
SampleHandler.h
RCU
This module defines a variety of assert style macros.
Definition: Assert.cxx:26
SH::ScanDir::m_sampleName
std::string m_sampleName
the value set by sampleName
Definition: ScanDir.h:173
Assert.h
StringUtil.h
SH::ScanDir::samplePattern
ScanDir & samplePattern(const std::string &val_samplePattern)
the pattern for samples to be accepted
Definition: ScanDir.cxx:130
SH::ScanDir::directoryPattern
ScanDir & directoryPattern(const std::string &val_directoryPattern)
the pattern for directories to be visited
Definition: ScanDir.cxx:112
mergePhysValFiles.end
end
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:93
PrepareReferenceFile.regex
regex
Definition: PrepareReferenceFile.py:43
SamplePtr.h
SampleLocal.h
SH::ScanDir::samplePostfix
ScanDir & samplePostfix(const std::string &val_samplePostfix)
the pattern for the postfix to be stripped from the sampleName
Definition: ScanDir.cxx:139
FullCPAlgorithmsTest_eljob.sample
sample
Definition: FullCPAlgorithmsTest_eljob.py:116
SH::ScanDir::m_relSampleDepth
int m_relSampleDepth
if this is negative it is the depth at which we take the sample name, counting from the end
Definition: ScanDir.h:163
SH::ScanDir::sampleDepth
ScanDir & sampleDepth(int val_sampleDepth)
the index of the file hierarchy at which we gather the sample name.
Definition: ScanDir.cxx:47
RCU::Shell
Definition: ShellExec.cxx:28
RCU::match_expr
bool match_expr(const boost::regex &expr, const std::string &str)
returns: whether we can match the entire string with the regular expression guarantee: strong failure...
Definition: StringUtil.cxx:40
SH::DiskList
an interface for listing directory contents, locally or on a file server
Definition: DiskList.h:32
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
SH::ScanDir::absSampleDepth
ScanDir & absSampleDepth(int val_absSampleDepth)
the index of the file hierarchy at which we gather the sample name.
Definition: ScanDir.cxx:57
SH::ScanDir::extraNameComponent
ScanDir & extraNameComponent(int val_relSampleDepth)
attach an extra name component to the sample based on a second component of the path
Definition: ScanDir.cxx:158
SH::DiskListEOS
a DiskList implementation for EOS
Definition: DiskListEOS.h:26
beamspotman.dir
string dir
Definition: beamspotman.py:623
SH::ScanDir::m_sampleRename
std::vector< std::pair< boost::regex, std::string > > m_sampleRename
Definition: ScanDir.h:211
name
std::string name
Definition: Control/AthContainers/Root/debug.cxx:221
SH::ScanDir::fileRegex
ScanDir & fileRegex(const std::string &val_fileRegex)
the regular expression for files to be accepted
Definition: ScanDir.cxx:103
ThrowMsg.h
SH::ScanDir::m_absSampleDepth
int m_absSampleDepth
if m_relSampleDepth is not negative, it is the depth at which we take the sample name,...
Definition: ScanDir.h:169
SH::SamplePtr
A smart pointer class that holds a single Sample object.
Definition: SamplePtr.h:35
SH::DiskListLocal
a DiskList implementation for local directories
Definition: DiskListLocal.h:27
SH::ScanDir::scan
const ScanDir & scan(SampleHandler &sh, const std::string &dir) const
scan the given directory and put the created samples into the sample handler
Definition: ScanDir.cxx:168
SH::ScanDir::m_maxDepth
std::size_t m_maxDepth
the value set by maxDepth
Definition: ScanDir.h:181
SH::SampleLocal
A Sample based on a simple file list.
Definition: SampleLocal.h:38
SH::ScanDir::scanEOS
const ScanDir & scanEOS(SampleHandler &sh, const std::string &eosDir) const
scan the given directory in EOS and put the created samples into the sample handler
Definition: ScanDir.cxx:178
SH::ScanDir::sampleName
ScanDir & sampleName(const std::string &val_sampleName)
a single sample name into which all found files should be placed.
Definition: ScanDir.cxx:67
SH::ScanDir::filePattern
ScanDir & filePattern(const std::string &val_filePattern)
the pattern for files to be accepted
Definition: ScanDir.cxx:94
SH::SampleHandler
A class that manages a list of Sample objects.
Definition: SampleHandler.h:60
SH
This module provides a lot of global definitions, forward declarations and includes that are used by ...
Definition: PrunDriver.h:15
RCU_THROW_MSG
#define RCU_THROW_MSG(message)
Definition: PrintMsg.h:58
RCU_ASSERT
#define RCU_ASSERT(x)
Definition: Assert.h:222
RCU::glob_to_regexp
std::string glob_to_regexp(const std::string &glob)
returns: a string that is the regular expression equivalent of the given glob expression guarantee: s...
Definition: StringUtil.cxx:56
SH::ScanDir::m_extraNameComponent
int m_extraNameComponent
the depth set with extraNameComponent, or 0 otherwise
Definition: ScanDir.h:215
Trk::split
@ split
Definition: LayerMaterialProperties.h:38
SH::ScanDir::addSampleFile
void addSampleFile(std::map< std::string, SamplePtr > &samples, const std::vector< std::string > &hierarchy, const std::string &path) const
add the given file to the sample based on the hierarchy, creating the sample if necessary
Definition: ScanDir.cxx:255
SH::ScanDir::ScanDir
ScanDir()
standard constructor
Definition: ScanDir.cxx:33
SH::ScanDir::recurse
void recurse(std::map< std::string, SamplePtr > &samples, DiskList &list, const std::vector< std::string > &hierarchy) const
perform the recursive scanning of the directory tree
Definition: ScanDir.cxx:210
SH::ScanDir::minDepth
ScanDir & minDepth(std::size_t val_minDepth)
the minimum depth for files to make it into the sample
Definition: ScanDir.cxx:76
ANA_MSG_DEBUG
#define ANA_MSG_DEBUG(xmsg)
Macro printing debug messages.
Definition: Control/AthToolSupport/AsgMessaging/AsgMessaging/MessageCheck.h:288