ATLAS Offline Software
Loading...
Searching...
No Matches
ScanDir.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3*/
4
6
7
8//
9// includes
10//
11
13
23#include <TString.h>
24#include <memory>
25
26//
27// method implementations
28//
29
30namespace SH
31{
32 ScanDir ::
33 ScanDir ()
35 m_minDepth (0), m_maxDepth (-1),
36 m_filePattern (RCU::glob_to_regexp ("*.root*")),
37 m_directoryPattern (RCU::glob_to_regexp ("*")),
38 m_samplePattern (RCU::glob_to_regexp ("*")),
39 m_samplePostfix (RCU::glob_to_regexp ("")),
42 {}
43
44
45
46 ScanDir& ScanDir ::
47 sampleDepth (int val_sampleDepth)
48 {
49 m_relSampleDepth = val_sampleDepth;
50 m_absSampleDepth = val_sampleDepth;
51 return *this;
52 }
53
54
55
56 ScanDir& ScanDir ::
57 absSampleDepth (int val_absSampleDepth)
58 {
60 m_absSampleDepth = val_absSampleDepth;
61 return *this;
62 }
63
64
65
66 ScanDir& ScanDir ::
67 sampleName (const std::string& val_sampleName)
68 {
69 m_sampleName = val_sampleName;
70 return *this;
71 }
72
73
74
75 ScanDir& ScanDir ::
76 minDepth (std::size_t val_minDepth)
77 {
78 m_minDepth = val_minDepth;
79 return *this;
80 }
81
82
83
84 ScanDir& ScanDir ::
85 maxDepth (std::size_t val_maxDepth)
86 {
87 m_maxDepth = val_maxDepth;
88 return *this;
89 }
90
91
92
93 ScanDir& ScanDir ::
94 filePattern (const std::string& val_filePattern)
95 {
96 m_filePattern = RCU::glob_to_regexp (val_filePattern);
97 return *this;
98 }
99
100
101
102 ScanDir& ScanDir ::
103 fileRegex (const std::string& val_fileRegex)
104 {
105 m_filePattern = val_fileRegex;
106 return *this;
107 }
108
109
110
111 ScanDir& ScanDir ::
112 directoryPattern (const std::string& val_directoryPattern)
113 {
114 m_directoryPattern = RCU::glob_to_regexp (val_directoryPattern);
115 return *this;
116 }
117
118
119
120 ScanDir& ScanDir ::
121 directoryRegex (const std::string& val_directoryRegex)
122 {
123 m_directoryPattern = val_directoryRegex;
124 return *this;
125 }
126
127
128
129 ScanDir& ScanDir ::
130 samplePattern (const std::string& val_samplePattern)
131 {
132 m_samplePattern = RCU::glob_to_regexp (val_samplePattern);
133 return *this;
134 }
135
136
137
138 ScanDir& ScanDir ::
139 samplePostfix (const std::string& val_samplePostfix)
140 {
141 m_samplePostfix = RCU::glob_to_regexp (val_samplePostfix);
142 m_samplePostfixEmpty = val_samplePostfix.empty();
143 return *this;
144 }
145
146
147
148 ScanDir& ScanDir ::
149 sampleRename (const std::string& pattern, const std::string& name)
150 {
151 m_sampleRename.push_back (std::pair<std::regex,std::string> (std::regex (RCU::glob_to_regexp (pattern)), name));
152 return *this;
153 }
154
155
156
157 ScanDir& ScanDir ::
158 extraNameComponent (int val_relSampleDepth)
159 {
160 RCU_REQUIRE (val_relSampleDepth != 0);
161 m_extraNameComponent = val_relSampleDepth;
162 return *this;
163 }
164
165
166
167 const ScanDir& ScanDir ::
168 scan (SampleHandler& sh, const std::string& dir) const
169 {
170 DiskListLocal list (dir);
171 scan (sh, list);
172 return *this;
173 }
174
175
176
177 const ScanDir& ScanDir ::
178 scanEOS (SampleHandler& sh, const std::string& eosDir) const
179 {
180 DiskListEOS list (eosDir);
181 scan (sh, list);
182 return *this;
183 }
184
185
186
187 const ScanDir& ScanDir ::
188 scan (SampleHandler& sh, DiskList& list) const
189 {
190 std::vector<std::string> hierarchy;
191 hierarchy.push_back (list.dirname());
192
193 std::map<std::string,SamplePtr> samples;
194 typedef std::map<std::string,SamplePtr>::iterator samplesIter;
195 recurse (samples, list, hierarchy);
196 for (samplesIter sample = samples.begin(), end = samples.end();
197 sample != end; ++ sample)
198 {
199 if (sample->second.get() != 0)
200 {
201 sh.add (sample->second);
202 }
203 }
204 return *this;
205 }
206
207
208
209 void ScanDir ::
210 recurse (std::map<std::string,SamplePtr>& samples, DiskList& list,
211 const std::vector<std::string>& hierarchy) const
212 {
213 using namespace msgScanDir;
214
215 ANA_MSG_DEBUG ("scanning directory: " << list.dirname());
216 while (list.next())
217 {
218 std::unique_ptr<DiskList> sublist (list.openDir());
219
220 if (sublist.get() != 0)
221 {
222 if (!RCU::match_expr (m_directoryPattern, list.fileName()))
223 {
224 ANA_MSG_DEBUG ("directory does not match pattern, skipping directory " << list.path());
225 } else if (hierarchy.size() > m_maxDepth)
226 {
227 ANA_MSG_DEBUG ("maxDepth exceeded, skipping directory " << list.path());
228 } else
229 {
230 ANA_MSG_DEBUG ("descending into directory " << list.path());
231 std::vector<std::string> subhierarchy = hierarchy;
232 subhierarchy.push_back (list.fileName());
233 recurse (samples, *sublist, subhierarchy);
234 }
235 } else
236 {
237 if (hierarchy.size() > m_minDepth &&
238 RCU::match_expr (m_filePattern, list.fileName()))
239 {
240 ANA_MSG_DEBUG ("adding file " << list.path());
241 std::vector<std::string> subhierarchy = hierarchy;
242 subhierarchy.push_back (list.fileName());
243 addSampleFile (samples, subhierarchy, list.path());
244 } else
245 {
246 ANA_MSG_DEBUG ("skipping file " << list.path());
247 }
248 }
249 }
250 }
251
252
253
254 void ScanDir ::
255 addSampleFile (std::map<std::string,SamplePtr>& samples,
256 const std::vector<std::string>& hierarchy,
257 const std::string& path) const
258 {
259 std::string sampleName;
260
261 if (!m_sampleName.empty())
262 {
264 } else
265 {
268 if (sampleName.empty())
269 return;
270
272 {
273 bool done = false;
274 for (std::size_t iter = 0, end = sampleName.size();
275 iter != end && !done; ++ iter)
276 {
277 if (RCU::match_expr (m_samplePostfix, sampleName.substr (iter)))
278 {
279 if (iter == 0)
280 RCU_THROW_MSG ("sample name matches entire postfix pattern: \"" + sampleName + "\"");
281 sampleName.resize (iter);
282 done = true;
283 }
284 }
285 }
286
287 if (m_extraNameComponent != 0)
288 {
289 std::string component = findPathComponent
291 if (component.empty())
292 return;
293 sampleName += "_" + component;
294 }
295
297 return;
298
299 {
300 bool done = false;
301 for (SampleRenameIter iter = m_sampleRename.begin(),
302 end = m_sampleRename.end(); !done && iter != end; ++ iter)
303 {
304 if (RCU::match_expr (iter->first, sampleName))
305 {
306 sampleName = iter->second;
307 done = true;
308 }
309 }
310 }
311 }
312
313 std::map<std::string,SamplePtr>::iterator iter
314 = samples.find (sampleName);
315 if (iter == samples.end())
316 {
317 SamplePtr sample (new SampleLocal (sampleName));
318 samples[sampleName] = sample;
319 iter = samples.find (sampleName);
320 }
321 SampleLocal *sample = dynamic_cast<SampleLocal*>(iter->second.get());
322 RCU_ASSERT (sample != 0);
323 sample->add (path);
324 }
325
326
327
328 std::string ScanDir ::
329 findPathComponent (const std::vector<std::string>& hierarchy,
330 int absSampleDepth,
331 int relSampleDepth) const
332 {
333 std::string sampleName;
334
335 int myindex = absSampleDepth+1;
336 if (relSampleDepth < 0)
337 myindex = relSampleDepth + hierarchy.size();
338 if (std::size_t (myindex) >= hierarchy.size())
339 return sampleName;
340 if (myindex > 0)
341 {
342 sampleName = hierarchy[myindex];
343 } else
344 {
345 sampleName = hierarchy[0];
346 while (sampleName.empty() ||
347 sampleName[sampleName.size()-1] == '/' ||
348 myindex < 0)
349 {
350 while (!sampleName.empty() && sampleName[sampleName.size()-1] == '/')
351 sampleName.pop_back();
352 if (sampleName.empty())
353 return sampleName;
354 if (myindex < 0)
355 {
356 std::string::size_type split = sampleName.rfind ('/');
357 if (split == std::string::npos)
358 {
359 sampleName.clear ();
360 return sampleName;
361 }
362 sampleName.resize (split);
363 ++ myindex;
364 }
365 if (sampleName.empty())
366 return sampleName;
367 }
368 std::string::size_type split = sampleName.rfind ('/');
369 if (split != std::string::npos)
370 sampleName = sampleName.substr (split + 1);
371 }
372 return sampleName;
373 }
374}
#define RCU_ASSERT(x)
Definition Assert.h:222
#define RCU_REQUIRE(x)
Definition Assert.h:208
#define ANA_MSG_DEBUG(xmsg)
Macro printing debug messages.
#define RCU_THROW_MSG(message)
Definition PrintMsg.h:58
a DiskList implementation for EOS
Definition DiskListEOS.h:26
a DiskList implementation for local directories
an interface for listing directory contents, locally or on a file server
Definition DiskList.h:32
A class that manages a list of Sample objects.
A Sample based on a simple file list.
Definition SampleLocal.h:38
A smart pointer class that holds a single Sample object.
Definition SamplePtr.h:35
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
void scan(TDirectory *td=0, int depth=0)
Definition listroot.cxx:440
This module defines a variety of assert style macros.
Definition Assert.cxx:26
bool match_expr(const std::regex &expr, const std::string &str)
returns: whether we can match the entire string with the regular expression guarantee: strong failure...
std::string glob_to_regexp(const std::string &glob)
returns: a string that is the regular expression equivalent of the given glob expression guarantee: s...
This module provides a lot of global definitions, forward declarations and includes that are used by ...
Definition PrunDriver.h:15
std::regex m_directoryPattern
the value set by directoryPattern, converted to a regular expression
Definition ScanDir.h:191
ScanDir & sampleName(const std::string &val_sampleName)
a single sample name into which all found files should be placed.
Definition ScanDir.cxx:67
std::string m_sampleName
the value set by sampleName
Definition ScanDir.h:173
std::vector< std::pair< std::regex, std::string > >::const_iterator SampleRenameIter
the list of entries from sampleRename
Definition ScanDir.h:210
bool m_samplePostfixEmpty
whether samplePostfix has been set to the empty string
Definition ScanDir.h:206
std::regex m_filePattern
the value set by filePattern, converted to a regular expression
Definition ScanDir.h:186
std::string findPathComponent(const std::vector< std::string > &hierarchy, int absSampleDepth, int relSampleDepth) const
find the path component at the given depth
Definition ScanDir.cxx:329
int m_relSampleDepth
if this is negative it is the depth at which we take the sample name, counting from the end
Definition ScanDir.h:163
int m_extraNameComponent
the depth set with extraNameComponent, or 0 otherwise
Definition ScanDir.h:215
std::vector< std::pair< std::regex, std::string > > m_sampleRename
Definition ScanDir.h:211
int m_absSampleDepth
if m_relSampleDepth is not negative, it is the depth at which we take the sample name,...
Definition ScanDir.h:169
ScanDir & absSampleDepth(int val_absSampleDepth)
the index of the file hierarchy at which we gather the sample name.
Definition ScanDir.cxx:57
std::size_t m_maxDepth
the value set by maxDepth
Definition ScanDir.h:181
std::regex m_samplePattern
the value set by samplePattern, converted to a regular expression
Definition ScanDir.h:196
ScanDir()
standard constructor
Definition ScanDir.cxx:33
void recurse(std::map< std::string, SamplePtr > &samples, DiskList &list, const std::vector< std::string > &hierarchy) const
perform the recursive scanning of the directory tree
Definition ScanDir.cxx:210
std::size_t m_minDepth
the value set by minDepth
Definition ScanDir.h:177
void addSampleFile(std::map< std::string, SamplePtr > &samples, const std::vector< std::string > &hierarchy, const std::string &path) const
add the given file to the sample based on the hierarchy, creating the sample if necessary
Definition ScanDir.cxx:255
std::regex m_samplePostfix
the value set by samplePostfix, converted to a regular expression
Definition ScanDir.h:201