ATLAS Offline Software
Loading...
Searching...
No Matches
ToolsDiscovery.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3*/
4
6
7//
8// includes
9//
10
12
24#include <TChain.h>
25#include <TChainElement.h>
26#include <TFile.h>
27#include <TTree.h>
28#include <fstream>
29#include <memory>
30
31//
32// method implementations
33//
34
35namespace SH
36{
38 const std::string& pattern,
39 const std::string& samplePattern,
40 const std::string& samplePostfix)
41 {
42 ScanDir()
43 .sampleDepth (0)
44 .minDepth (1)
45 .maxDepth (1)
46 .filePattern (pattern)
47 .samplePattern (samplePattern)
48 .samplePostfix (samplePostfix)
49 .scan (sh, list);
50 }
51
52
53
54 void scanDir (SampleHandler& sh, const std::string& dir)
55 {
56 ScanDir()
57 .sampleDepth (0)
58 .minDepth (1)
59 .maxDepth (1)
60 .scan (sh, dir);
61 }
62
63
64
65 void scanDir (SampleHandler& sh, const std::string& dir,
66 const std::string& prefix)
67 {
68 DiskListLocal list (dir, prefix);
69 ScanDir()
70 .sampleDepth (0)
71 .minDepth (1)
72 .maxDepth (1)
73 .scan (sh, list);
74 }
75
76
77
79 const std::string& pattern)
80 {
81 ScanDir()
82 .sampleDepth (-1)
83 .filePattern (pattern)
84 .samplePostfix (".*")
85 .scan (sh, list);
86 }
87
88
89
90 Sample *makeFromTChain (const std::string& name, const TChain& chain)
91 {
92 std::unique_ptr<SampleLocal> result (new SampleLocal (name));
93 result->meta()->setString (MetaFields::treeName, chain.GetName());
94
95 TIter chainIter (chain.GetListOfFiles());
96 TChainElement *chainElement = 0;
97 while ((chainElement = dynamic_cast<TChainElement*>(chainIter.Next())) != 0)
98 result->add (chainElement->GetTitle());
99 return result.release();
100 }
101
102
103
104 void scanSingleDir (SampleHandler& sh, const std::string& name,
105 DiskList& list, const std::string& pattern)
106 {
107 ScanDir()
108 .sampleDepth (0)
109 .filePattern (pattern)
110 .sampleRename ("*", name)
111 .scan (sh, list);
112 }
113
114
115
116 void scanDQ2 (SampleHandler& sh, const std::string& pattern)
117 {
118 if (pattern.find ("*") == std::string::npos)
119 {
120 addGrid (sh, pattern);
121 } else
122 {
123 std::set<std::string> types = {"DATASET", "DIDType.DATASET"};
124 if (pattern.back() == '/')
125 types = {"CONTAINER", "DIDType.CONTAINER"};
126
127 auto subresult = rucioListDids (pattern);
128 for (auto& ds : subresult)
129 {
130 if (types.find (ds.type) != types.end())
131 addGrid (sh, ds.scope + ":" + ds.name);
132 }
133 }
134 }
135
136
137
138 void scanRucio (SampleHandler& sh, const std::string& pattern,
139 bool alwaysQuery)
140 {
141 if (pattern.find ("*") == std::string::npos && !alwaysQuery)
142 {
143 addGrid (sh, pattern);
144 } else
145 {
146 auto subresult = rucioListDids (pattern);
147 bool added = false;
148 for (std::string type : {"CONTAINER", "DIDType.CONTAINER", "DATASET", "DIDType.DATASET"})
149 {
150 for (auto& ds : subresult)
151 {
152 if (ds.type == type)
153 {
154 addGrid (sh, ds.scope + ":" + ds.name);
155 added = true;
156 }
157 }
158 if (added)
159 return;
160 }
161 RCU_THROW_MSG ("failed to find any datasets matching pattern: " + pattern);
162 }
163 }
164
165
166
167 void addGrid (SampleHandler& sh, const std::string& ds)
168 {
169 RCU_ASSERT_SOFT (ds.find ("*") == std::string::npos);
170
171 std::string name;
172 if (ds[ds.size()-1] == '/')
173 name = ds.substr (0, ds.size()-1);
174 else
175 name = ds;
176
177 auto sample = std::make_unique<SampleGrid> (name);
178 sample->meta()->setString (MetaFields::gridName, ds);
179 sample->meta()->setString (MetaFields::gridFilter, MetaFields::gridFilter_default);
180 sh.add (std::move (sample));
181 }
182
183
184
185 void addGridCombined (SampleHandler& sh, const std::string& dsName,
186 const std::vector<std::string>& dsList)
187 {
188 std::string name;
189 for (const std::string &ds : dsList)
190 {
191 RCU_ASSERT_SOFT (ds.find ("*") == std::string::npos);
192
193 if (!name.empty())
194 name.append(",");
195
196 if (ds.at(ds.size() - 1) == '/')
197 name.append(ds.substr (0, ds.size() - 1));
198 else
199 name.append(ds);
200 }
201
202 auto sample = std::make_unique<SampleGrid> (dsName);
203 sample->meta()->setString (MetaFields::gridName, name);
204 sample->meta()->setString (MetaFields::gridFilter, MetaFields::gridFilter_default);
205 sh.add (std::move (sample));
206 }
207 void addGridCombinedFromFile (SampleHandler& sh, const std::string& dsName,
208 const std::string& dsFile)
209 {
210 std::ifstream file (dsFile.c_str());
211
212 std::string name;
213 std::string ds;
214 const std::set<char> whitespaces{'\t',' ','\n','\r'};
215 while (std::getline (file, ds))
216 {
217 while ((!ds.empty()) && whitespaces.count(ds.back())) ds.pop_back();
218 if (ds.empty() || ds.at(0) == '#')
219 continue;
220
221 RCU_ASSERT_SOFT (ds.find ("*") == std::string::npos);
222
223 if (!name.empty())
224 name.append(",");
225
226 if (ds.at(ds.size() - 1) == '/')
227 name.append(ds.substr (0, ds.size() - 1));
228 else
229 name.append(ds);
230 }
231 if (!file.eof())
232 RCU_THROW_MSG ("failed to read file: " + dsFile);
233
234 auto sample = std::make_unique<SampleGrid> (dsName);
235 sample->meta()->setString (MetaFields::gridName, name);
236 sample->meta()->setString (MetaFields::gridFilter, MetaFields::gridFilter_default);
237 sh.add (std::move (sample));
238 }
239
240
241 void makeGridDirect (SampleHandler& sh, const std::string& disk,
242 const std::string& from, const std::string& to,
243 bool allow_partial)
244 {
245 SampleHandler mysh;
246
247 for (auto sample : sh.samples())
248 {
249 SampleGrid *grid = dynamic_cast<SampleGrid*>(sample.get());
250
251 if (grid == 0)
252 {
253 mysh.add (sample);
254 } else
255 {
256 const std::string ds = grid->meta()->castString (MetaFields::gridName);
257 if (ds.empty())
258 RCU_THROW_MSG ("no dataset configured for grid dataset " + ds);
259
261
262 std::set<std::string> knownFiles;
263 std::map<std::string,std::string> usedFiles;
264 for (auto& entry : rucioListFileReplicas (ds))
265 {
266 if (RCU::match_expr (pattern, entry.name))
267 {
268 knownFiles.insert (entry.name);
269 if (entry.disk == disk)
270 {
271 std::string url = entry.replica;
272 const auto split = url.find (from);
273 if (split != std::string::npos)
274 url.replace(split, from.size(), to);
275 usedFiles[entry.name] = url;
276 }
277 }
278 }
279
280 if (usedFiles.empty())
281 {
282 if (allow_partial)
283 RCU_WARN_MSG ("dataset " + ds + " not at " + disk + ", skipped");
284 } else if (knownFiles.size() != usedFiles.size())
285 {
286 if (allow_partial)
287 {
288 RCU_WARN_MSG ("only incomplete version of dataset " + ds + " at " + disk);
289 } else
290 {
291 usedFiles.clear ();
292 }
293 }
294
295 if (usedFiles.size() == 0)
296 {
297 sh.add (sample);
298 } else
299 {
300 auto mysample = std::make_unique<SampleLocal> (grid->name());
301 *mysample->meta() = *grid->meta();
302
303 for (const auto& file : usedFiles)
304 {
305 mysample->add (file.second);
306 }
307 mysh.add (std::move (mysample));
308 }
309 }
310 }
311 swap (sh, mysh);
312 }
313
314
315
316 void scanForTrees (SampleHandler& sh, std::shared_ptr<Sample>& sample,
317 const std::string& pattern)
318 {
319 auto mysample = sample->makeLocal();
320 if (mysample->numFiles() == 0)
321 {
322 sh.add (sample);
323 return;
324 }
325 std::unique_ptr<TFile> file (TFile::Open (mysample->fileName(0).c_str()));
326 if (!file.get())
327 RCU_THROW_MSG ("could not open file: " + mysample->fileName(0));
328 TObject *object = 0;
329 std::regex mypattern (pattern);
330 for (TIter iter (file->GetListOfKeys()); (object = iter.Next()); )
331 {
332 if (RCU::match_expr (mypattern, object->GetName()) &&
333 dynamic_cast<TTree*>(file->Get(object->GetName())))
334 {
335 std::string newName = sample->name() + "_" + object->GetName();
336 std::unique_ptr<Sample> newSample
337 (dynamic_cast<Sample*>(sample->Clone (newName.c_str())));
338 newSample->name (newName);
339 newSample->meta()->setString (MetaFields::treeName, object->GetName());
340 sh.add (std::move (newSample));
341 }
342 }
343 }
344
345
346
347 void scanForTrees (SampleHandler& sh, const std::string& pattern)
348 {
349 SH::SampleHandler sh_new;
350
351 for (auto sample : sh.samples())
352 {
353 scanForTrees (sh_new, sample, pattern);
354 }
355 swap (sh, sh_new);
356 }
357
358
359
360 void readFileList (SampleHandler& sh, const std::string& name,
361 const std::string& file)
362 {
363 std::ifstream myfile (file.c_str());
364
365 auto sample = std::make_unique<SampleLocal> (name);
366 std::string line;
367 const std::set<char> whitespaces{'\t',' ','\n','\r'};
368 while (std::getline (myfile, line))
369 {
370 while ((!line.empty()) && whitespaces.count(line.back())) line.pop_back();
371 if (!line.empty() && line.at(0) != '#')
372 {
373 sample->add (line);
374 }
375 }
376 if (!myfile.eof())
377 RCU_THROW_MSG ("failed to read file: " + file);
378 sh.add (std::move (sample));
379 }
380}
#define RCU_ASSERT_SOFT(x)
Definition Assert.h:162
static const std::vector< std::string > types
#define RCU_THROW_MSG(message)
Definition PrintMsg.h:53
#define RCU_WARN_MSG(message)
Definition PrintMsg.h:47
a DiskList implementation for local directories
an interface for listing directory contents, locally or on a file server
Definition DiskList.h:24
void swap(MetaObject &a, MetaObject &b)
standard swap
std::string castString(const std::string &name, const std::string &def_val="", CastMode mode=CAST_ERROR_THROW) const
the meta-data string with the given name
This class implements a Sample located on the grid.
Definition SampleGrid.h:36
A class that manages a list of Sample objects.
void add(const Sample &sample)
add a copy of the sample to the handler
A Sample based on a simple file list.
Definition SampleLocal.h:30
a base class that manages a set of files belonging to a particular data set and the associated meta-d...
Definition Sample.h:49
MetaObject * meta()
the meta-information for this sample
const std::string & name() const
the name of the sample we are using
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:179
bool match_expr(const std::regex &expr, std::string_view str)
returns: whether we can match the entire string with the regular expression guarantee: strong failure...
std::string glob_to_regexp(std::string_view glob)
returns: a string that is the regular expression equivalent of the given glob expression guarantee: s...
This module provides a lot of global definitions, forward declarations and includes that are used by ...
Definition PrunDriver.h:15
void scanSingleDir(SampleHandler &sh, const std::string &name, DiskList &list, const std::string &pattern)
effects: scan the given directory tree and turn it into a single sample of the given name guarantee: ...
void addGrid(SampleHandler &sh, const std::string &ds)
effects: add a grid dataset for dataset ds guarantee: strong failures: out of memory II requires: ds....
void scanForTrees(SampleHandler &sh, std::shared_ptr< Sample > &sample, const std::string &pattern)
effects: scan for trees in the given sample (or sample handler), and create a separate sample for eac...
void scanDQ2(SampleHandler &sh, const std::string &pattern)
effects: make a list from DQ2 using the given pattern guarantee: basic, may add partially failures: o...
void readFileList(SampleHandler &sh, const std::string &name, const std::string &file)
effects: read a file list from a text file guarantee: strong failures: out of memory III failures: i/...
Sample * makeFromTChain(const std::string &name, const TChain &chain)
effects: create a sample with the given name from the given TChain object guarantee: strong failures:...
void scanFiles(SampleHandler &sh, DiskList &list, const std::string &pattern)
effects: scan the given directory tree and make a separate sample for each file (using the file name ...
std::vector< RucioListFileReplicasEntry > rucioListFileReplicas(const std::string &dataset)
run rucio-list-file-replicas for the given dataset
std::vector< RucioListDidsEntry > rucioListDids(const std::string &dataset)
run rucio-list-dids for the given dataset
void makeGridDirect(SampleHandler &sh, const std::string &disk, const std::string &from, const std::string &to, bool allow_partial)
effects: update all grid samples in the sample handler that are located on the given disk to be opene...
void scanDir(SampleHandler &sh, DiskList &list, const std::string &pattern, const std::string &samplePattern, const std::string &samplePostfix)
effects: scan the given directory and add all subdirectories as samples that contain root files.
void scanRucio(SampleHandler &sh, const std::string &pattern, bool alwaysQuery)
make a list of grid datasets using the given pattern
void addGridCombined(SampleHandler &sh, const std::string &dsName, const std::vector< std::string > &dsList)
effects: add a combined grid dataset with name dsName for dataset list dsList guarantee: strong failu...
void addGridCombinedFromFile(SampleHandler &sh, const std::string &dsName, const std::string &dsFile)
effects: add a combined grid dataset with name dsName for dataset list file dsFile guarantee: strong ...
static const std::string gridFilter
the field containing the file filter for the dataset on the grid
Definition MetaFields.h:30
static const std::string gridName
the field containing the name of the dataset on the grid
Definition MetaFields.h:26
static const std::string treeName
the name of the tree in the sample
Definition MetaFields.h:44
static const std::string gridFilter_default
the default value for gridFilter
Definition MetaFields.h:33
the class used for scanning local directories and file servers for samples
Definition ScanDir.h:39
ScanDir & samplePattern(const std::string &val_samplePattern)
the pattern for samples to be accepted
Definition ScanDir.cxx:130
const ScanDir & scan(SampleHandler &sh, const std::string &dir) const
scan the given directory and put the created samples into the sample handler
Definition ScanDir.cxx:168
ScanDir & sampleDepth(int val_sampleDepth)
the index of the file hierarchy at which we gather the sample name.
Definition ScanDir.cxx:47
ScanDir & maxDepth(std::size_t val_maxDepth)
the maximum depth for files to make it into the sample
Definition ScanDir.cxx:85
ScanDir & samplePostfix(const std::string &val_samplePostfix)
the pattern for the postfix to be stripped from the sampleName
Definition ScanDir.cxx:139
ScanDir & minDepth(std::size_t val_minDepth)
the minimum depth for files to make it into the sample
Definition ScanDir.cxx:76
ScanDir & sampleRename(const std::string &pattern, const std::string &name)
rename any sample matching pattern to name
Definition ScanDir.cxx:149
ScanDir & filePattern(const std::string &val_filePattern)
the pattern for files to be accepted
Definition ScanDir.cxx:94
TFile * file