ATLAS Offline Software
Loading...
Searching...
No Matches
ScanDir.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3*/
4
6
7
8//
9// includes
10//
11
13
23#include <TString.h>
24#include <memory>
25
26//
27// method implementations
28//
29
30namespace SH
31{
32 ScanDir ::
33 ScanDir ()
35 m_minDepth (0), m_maxDepth (-1),
36 m_filePattern (RCU::glob_to_regexp ("*.root*")),
37 m_directoryPattern (RCU::glob_to_regexp ("*")),
38 m_samplePattern (RCU::glob_to_regexp ("*")),
39 m_samplePostfix (RCU::glob_to_regexp ("")),
42 {}
43
44
45
46 ScanDir& ScanDir ::
47 sampleDepth (int val_sampleDepth)
48 {
49 m_relSampleDepth = val_sampleDepth;
50 m_absSampleDepth = val_sampleDepth;
51 return *this;
52 }
53
54
55
56 ScanDir& ScanDir ::
57 absSampleDepth (int val_absSampleDepth)
58 {
60 m_absSampleDepth = val_absSampleDepth;
61 return *this;
62 }
63
64
65
66 ScanDir& ScanDir ::
67 sampleName (const std::string& val_sampleName)
68 {
69 m_sampleName = val_sampleName;
70 return *this;
71 }
72
73
74
75 ScanDir& ScanDir ::
76 minDepth (std::size_t val_minDepth)
77 {
78 m_minDepth = val_minDepth;
79 return *this;
80 }
81
82
83
84 ScanDir& ScanDir ::
85 maxDepth (std::size_t val_maxDepth)
86 {
87 m_maxDepth = val_maxDepth;
88 return *this;
89 }
90
91
92
93 ScanDir& ScanDir ::
94 filePattern (const std::string& val_filePattern)
95 {
96 m_filePattern = RCU::glob_to_regexp (val_filePattern);
97 return *this;
98 }
99
100
101
102 ScanDir& ScanDir ::
103 fileRegex (const std::string& val_fileRegex)
104 {
105 m_filePattern = val_fileRegex;
106 return *this;
107 }
108
109
110
111 ScanDir& ScanDir ::
112 directoryPattern (const std::string& val_directoryPattern)
113 {
114 m_directoryPattern = RCU::glob_to_regexp (val_directoryPattern);
115 return *this;
116 }
117
118
119
120 ScanDir& ScanDir ::
121 directoryRegex (const std::string& val_directoryRegex)
122 {
123 m_directoryPattern = val_directoryRegex;
124 return *this;
125 }
126
127
128
129 ScanDir& ScanDir ::
130 samplePattern (const std::string& val_samplePattern)
131 {
132 m_samplePattern = RCU::glob_to_regexp (val_samplePattern);
133 return *this;
134 }
135
136
137
138 ScanDir& ScanDir ::
139 samplePostfix (const std::string& val_samplePostfix)
140 {
141 m_samplePostfix = RCU::glob_to_regexp (val_samplePostfix);
142 m_samplePostfixEmpty = val_samplePostfix.empty();
143 return *this;
144 }
145
146
147
148 ScanDir& ScanDir ::
149 sampleRename (const std::string& pattern, const std::string& name)
150 {
151 m_sampleRename.push_back (std::pair<std::regex,std::string> (std::regex (RCU::glob_to_regexp (pattern)), name));
152 return *this;
153 }
154
155
156
157 ScanDir& ScanDir ::
158 extraNameComponent (int val_relSampleDepth)
159 {
160 RCU_REQUIRE (val_relSampleDepth != 0);
161 m_extraNameComponent = val_relSampleDepth;
162 return *this;
163 }
164
165
166
167 const ScanDir& ScanDir ::
168 scan (SampleHandler& sh, const std::string& dir) const
169 {
170 DiskListLocal list (dir);
171 scan (sh, list);
172 return *this;
173 }
174
175
176
177 const ScanDir& ScanDir ::
178 scanEOS (SampleHandler& sh, const std::string& eosDir) const
179 {
180 DiskListEOS list (eosDir);
181 scan (sh, list);
182 return *this;
183 }
184
185
186
187 const ScanDir& ScanDir ::
188 scan (SampleHandler& sh, DiskList& list) const
189 {
190 std::vector<std::string> hierarchy;
191 hierarchy.push_back (list.dirname());
192
193 std::map<std::string,std::shared_ptr<Sample>> samples;
194 recurse (samples, list, hierarchy);
195 for (auto sample = samples.begin(), end = samples.end();
196 sample != end; ++ sample)
197 {
198 if (sample->second != nullptr)
199 {
200 sh.add (sample->second);
201 }
202 }
203 return *this;
204 }
205
206
207
208 void ScanDir ::
209 recurse (std::map<std::string,std::shared_ptr<Sample>>& samples,
210 DiskList& list,
211 const std::vector<std::string>& hierarchy) const
212 {
213 using namespace msgScanDir;
214
215 ANA_MSG_DEBUG ("scanning directory: " << list.dirname());
216 while (list.next())
217 {
218 std::unique_ptr<DiskList> sublist (list.openDir());
219
220 if (sublist.get() != 0)
221 {
222 if (!RCU::match_expr (m_directoryPattern, list.fileName()))
223 {
224 ANA_MSG_DEBUG ("directory does not match pattern, skipping directory " << list.path());
225 } else if (hierarchy.size() > m_maxDepth)
226 {
227 ANA_MSG_DEBUG ("maxDepth exceeded, skipping directory " << list.path());
228 } else
229 {
230 ANA_MSG_DEBUG ("descending into directory " << list.path());
231 std::vector<std::string> subhierarchy = hierarchy;
232 subhierarchy.push_back (list.fileName());
233 recurse (samples, *sublist, subhierarchy);
234 }
235 } else
236 {
237 if (hierarchy.size() > m_minDepth &&
238 RCU::match_expr (m_filePattern, list.fileName()))
239 {
240 ANA_MSG_DEBUG ("adding file " << list.path());
241 std::vector<std::string> subhierarchy = hierarchy;
242 subhierarchy.push_back (list.fileName());
243 addSampleFile (samples, subhierarchy, list.path());
244 } else
245 {
246 ANA_MSG_DEBUG ("skipping file " << list.path());
247 }
248 }
249 }
250 }
251
252
253
254 void ScanDir ::
255 addSampleFile (std::map<std::string,std::shared_ptr<Sample>>& samples,
256 const std::vector<std::string>& hierarchy,
257 const std::string& path) const
258 {
259 std::string sampleName;
260
261 if (!m_sampleName.empty())
262 {
264 } else
265 {
268 if (sampleName.empty())
269 return;
270
272 {
273 bool done = false;
274 for (std::size_t iter = 0, end = sampleName.size();
275 iter != end && !done; ++ iter)
276 {
277 if (RCU::match_expr (m_samplePostfix, sampleName.substr (iter)))
278 {
279 if (iter == 0)
280 RCU_THROW_MSG ("sample name matches entire postfix pattern: \"" + sampleName + "\"");
281 sampleName.resize (iter);
282 done = true;
283 }
284 }
285 }
286
287 if (m_extraNameComponent != 0)
288 {
289 std::string component = findPathComponent
291 if (component.empty())
292 return;
293 sampleName += "_" + component;
294 }
295
297 return;
298
299 {
300 bool done = false;
301 for (SampleRenameIter iter = m_sampleRename.begin(),
302 end = m_sampleRename.end(); !done && iter != end; ++ iter)
303 {
304 if (RCU::match_expr (iter->first, sampleName))
305 {
306 sampleName = iter->second;
307 done = true;
308 }
309 }
310 }
311 }
312
313 auto iter = samples.find (sampleName);
314 if (iter == samples.end())
315 {
316 auto sample = std::make_shared<SampleLocal> (sampleName);
317 samples[sampleName] = sample;
318 iter = samples.find (sampleName);
319 }
320 SampleLocal *sample = dynamic_cast<SampleLocal*>(iter->second.get());
321 RCU_ASSERT (sample != 0);
322 sample->add (path);
323 }
324
325
326
327 std::string ScanDir ::
328 findPathComponent (const std::vector<std::string>& hierarchy,
329 int absSampleDepth,
330 int relSampleDepth) const
331 {
332 std::string sampleName;
333
334 int myindex = absSampleDepth+1;
335 if (relSampleDepth < 0)
336 myindex = relSampleDepth + hierarchy.size();
337 if (std::size_t (myindex) >= hierarchy.size())
338 return sampleName;
339 if (myindex > 0)
340 {
341 sampleName = hierarchy[myindex];
342 } else
343 {
344 sampleName = hierarchy[0];
345 while (sampleName.empty() ||
346 sampleName[sampleName.size()-1] == '/' ||
347 myindex < 0)
348 {
349 while (!sampleName.empty() && sampleName[sampleName.size()-1] == '/')
350 sampleName.pop_back();
351 if (sampleName.empty())
352 return sampleName;
353 if (myindex < 0)
354 {
355 std::string::size_type split = sampleName.rfind ('/');
356 if (split == std::string::npos)
357 {
358 sampleName.clear ();
359 return sampleName;
360 }
361 sampleName.resize (split);
362 ++ myindex;
363 }
364 if (sampleName.empty())
365 return sampleName;
366 }
367 std::string::size_type split = sampleName.rfind ('/');
368 if (split != std::string::npos)
369 sampleName = sampleName.substr (split + 1);
370 }
371 return sampleName;
372 }
373}
#define RCU_ASSERT(x)
Definition Assert.h:217
#define RCU_REQUIRE(x)
Definition Assert.h:203
#define ANA_MSG_DEBUG(xmsg)
Macro printing debug messages.
#define RCU_THROW_MSG(message)
Definition PrintMsg.h:53
a DiskList implementation for EOS
Definition DiskListEOS.h:18
a DiskList implementation for local directories
an interface for listing directory contents, locally or on a file server
Definition DiskList.h:24
A class that manages a list of Sample objects.
A Sample based on a simple file list.
Definition SampleLocal.h:30
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:179
void scan(TDirectory *td=0, int depth=0)
Definition listroot.cxx:440
This module defines a variety of assert style macros.
Definition Assert.cxx:23
bool match_expr(const std::regex &expr, std::string_view str)
returns: whether we can match the entire string with the regular expression guarantee: strong failure...
std::string glob_to_regexp(std::string_view glob)
returns: a string that is the regular expression equivalent of the given glob expression guarantee: s...
This module provides a lot of global definitions, forward declarations and includes that are used by ...
Definition PrunDriver.h:15
std::regex m_directoryPattern
the value set by directoryPattern, converted to a regular expression
Definition ScanDir.h:192
ScanDir & sampleName(const std::string &val_sampleName)
a single sample name into which all found files should be placed.
Definition ScanDir.cxx:67
std::string m_sampleName
the value set by sampleName
Definition ScanDir.h:174
std::vector< std::pair< std::regex, std::string > >::const_iterator SampleRenameIter
the list of entries from sampleRename
Definition ScanDir.h:211
bool m_samplePostfixEmpty
whether samplePostfix has been set to the empty string
Definition ScanDir.h:207
std::regex m_filePattern
the value set by filePattern, converted to a regular expression
Definition ScanDir.h:187
std::string findPathComponent(const std::vector< std::string > &hierarchy, int absSampleDepth, int relSampleDepth) const
find the path component at the given depth
Definition ScanDir.cxx:328
int m_relSampleDepth
if this is negative it is the depth at which we take the sample name, counting from the end
Definition ScanDir.h:164
int m_extraNameComponent
the depth set with extraNameComponent, or 0 otherwise
Definition ScanDir.h:216
std::vector< std::pair< std::regex, std::string > > m_sampleRename
Definition ScanDir.h:212
int m_absSampleDepth
if m_relSampleDepth is not negative, it is the depth at which we take the sample name,...
Definition ScanDir.h:170
ScanDir & absSampleDepth(int val_absSampleDepth)
the index of the file hierarchy at which we gather the sample name.
Definition ScanDir.cxx:57
std::size_t m_maxDepth
the value set by maxDepth
Definition ScanDir.h:182
std::regex m_samplePattern
the value set by samplePattern, converted to a regular expression
Definition ScanDir.h:197
ScanDir()
standard constructor
Definition ScanDir.cxx:33
std::size_t m_minDepth
the value set by minDepth
Definition ScanDir.h:178
std::regex m_samplePostfix
the value set by samplePostfix, converted to a regular expression
Definition ScanDir.h:202
void recurse(std::map< std::string, std::shared_ptr< Sample > > &samples, DiskList &list, const std::vector< std::string > &hierarchy) const
perform the recursive scanning of the directory tree
Definition ScanDir.cxx:209
void addSampleFile(std::map< std::string, std::shared_ptr< Sample > > &samples, const std::vector< std::string > &hierarchy, const std::string &path) const
add the given file to the sample based on the hierarchy, creating the sample if necessary
Definition ScanDir.cxx:255