ATLAS Offline Software
Loading...
Searching...
No Matches
ToolsDiscovery.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3*/
4
5//
6// Distributed under the Boost Software License, Version 1.0.
7// (See accompanying file LICENSE_1_0.txt or copy at
8// http://www.boost.org/LICENSE_1_0.txt)
9
10// Please feel free to contact me (krumnack@iastate.edu) for bug
11// reports, feature suggestions, praise and complaints.
12
13
14//
15// includes
16//
17
19
32#include <TChain.h>
33#include <TChainElement.h>
34#include <TFile.h>
35#include <TTree.h>
36#include <fstream>
37#include <memory>
38
39//
40// method implementations
41//
42
43namespace SH
44{
46 const std::string& pattern,
47 const std::string& samplePattern,
48 const std::string& samplePostfix)
49 {
50 ScanDir()
51 .sampleDepth (0)
52 .minDepth (1)
53 .maxDepth (1)
54 .filePattern (pattern)
55 .samplePattern (samplePattern)
56 .samplePostfix (samplePostfix)
57 .scan (sh, list);
58 }
59
60
61
62 void scanDir (SampleHandler& sh, const std::string& dir)
63 {
64 ScanDir()
65 .sampleDepth (0)
66 .minDepth (1)
67 .maxDepth (1)
68 .scan (sh, dir);
69 }
70
71
72
73 void scanDir (SampleHandler& sh, const std::string& dir,
74 const std::string& prefix)
75 {
76 DiskListLocal list (dir, prefix);
77 ScanDir()
78 .sampleDepth (0)
79 .minDepth (1)
80 .maxDepth (1)
81 .scan (sh, list);
82 }
83
84
85
87 const std::string& pattern)
88 {
89 ScanDir()
90 .sampleDepth (-1)
91 .filePattern (pattern)
92 .samplePostfix (".*")
93 .scan (sh, list);
94 }
95
96
97
98 Sample *makeFromTChain (const std::string& name, const TChain& chain)
99 {
100 std::unique_ptr<SampleLocal> result (new SampleLocal (name));
101 result->meta()->setString (MetaFields::treeName, chain.GetName());
102
103 TIter chainIter (chain.GetListOfFiles());
104 TChainElement *chainElement = 0;
105 while ((chainElement = dynamic_cast<TChainElement*>(chainIter.Next())) != 0)
106 result->add (chainElement->GetTitle());
107 return result.release();
108 }
109
110
111
112 void scanSingleDir (SampleHandler& sh, const std::string& name,
113 DiskList& list, const std::string& pattern)
114 {
115 ScanDir()
116 .sampleDepth (0)
117 .filePattern (pattern)
118 .sampleRename ("*", name)
119 .scan (sh, list);
120 }
121
122
123
124 void scanDQ2 (SampleHandler& sh, const std::string& pattern)
125 {
126 if (pattern.find ("*") == std::string::npos)
127 {
128 addGrid (sh, pattern);
129 } else
130 {
131 std::set<std::string> types = {"DATASET", "DIDType.DATASET"};
132 if (pattern.back() == '/')
133 types = {"CONTAINER", "DIDType.CONTAINER"};
134
135 auto subresult = rucioListDids (pattern);
136 for (auto& ds : subresult)
137 {
138 if (types.find (ds.type) != types.end())
139 addGrid (sh, ds.scope + ":" + ds.name);
140 }
141 }
142 }
143
144
145
146 void scanRucio (SampleHandler& sh, const std::string& pattern,
147 bool alwaysQuery)
148 {
149 if (pattern.find ("*") == std::string::npos && !alwaysQuery)
150 {
151 addGrid (sh, pattern);
152 } else
153 {
154 auto subresult = rucioListDids (pattern);
155 bool added = false;
156 for (std::string type : {"CONTAINER", "DIDType.CONTAINER", "DATASET", "DIDType.DATASET"})
157 {
158 for (auto& ds : subresult)
159 {
160 if (ds.type == type)
161 {
162 addGrid (sh, ds.scope + ":" + ds.name);
163 added = true;
164 }
165 }
166 if (added)
167 return;
168 }
169 RCU_THROW_MSG ("failed to find any datasets matching pattern: " + pattern);
170 }
171 }
172
173
174
175 void addGrid (SampleHandler& sh, const std::string& ds)
176 {
177 RCU_ASSERT_SOFT (ds.find ("*") == std::string::npos);
178
179 std::string name;
180 if (ds[ds.size()-1] == '/')
181 name = ds.substr (0, ds.size()-1);
182 else
183 name = ds;
184
185 auto sample = std::make_unique<SampleGrid> (name);
186 sample->meta()->setString (MetaFields::gridName, ds);
187 sample->meta()->setString (MetaFields::gridFilter, MetaFields::gridFilter_default);
188 sh.add (sample.release());
189 }
190
191
192
193 void addGridCombined (SampleHandler& sh, const std::string& dsName,
194 const std::vector<std::string>& dsList)
195 {
196 std::string name;
197 for (const std::string &ds : dsList)
198 {
199 RCU_ASSERT_SOFT (ds.find ("*") == std::string::npos);
200
201 if (!name.empty())
202 name.append(",");
203
204 if (ds.at(ds.size() - 1) == '/')
205 name.append(ds.substr (0, ds.size() - 1));
206 else
207 name.append(ds);
208 }
209
210 auto sample = std::make_unique<SampleGrid> (dsName);
211 sample->meta()->setString (MetaFields::gridName, name);
212 sample->meta()->setString (MetaFields::gridFilter, MetaFields::gridFilter_default);
213 sh.add (sample.release());
214 }
215 void addGridCombinedFromFile (SampleHandler& sh, const std::string& dsName,
216 const std::string& dsFile)
217 {
218 std::ifstream file (dsFile.c_str());
219
220 std::string name;
221 std::string ds;
222 const std::set<char> whitespaces{'\t',' ','\n','\r'};
223 while (std::getline (file, ds))
224 {
225 while ((!ds.empty()) && whitespaces.count(ds.back())) ds.pop_back();
226 if (ds.empty() || ds.at(0) == '#')
227 continue;
228
229 RCU_ASSERT_SOFT (ds.find ("*") == std::string::npos);
230
231 if (!name.empty())
232 name.append(",");
233
234 if (ds.at(ds.size() - 1) == '/')
235 name.append(ds.substr (0, ds.size() - 1));
236 else
237 name.append(ds);
238 }
239 if (!file.eof())
240 RCU_THROW_MSG ("failed to read file: " + dsFile);
241
242 auto sample = std::make_unique<SampleGrid> (dsName);
243 sample->meta()->setString (MetaFields::gridName, name);
244 sample->meta()->setString (MetaFields::gridFilter, MetaFields::gridFilter_default);
245 sh.add (sample.release());
246 }
247
248
249 void makeGridDirect (SampleHandler& sh, const std::string& disk,
250 const std::string& from, const std::string& to,
251 bool allow_partial)
252 {
253 SampleHandler mysh;
254
255 for (SampleHandler::iterator sample = sh.begin(),
256 end = sh.end(); sample != end; ++ sample)
257 {
258 SampleGrid *grid = dynamic_cast<SampleGrid*>(*sample);
259
260 if (grid == 0)
261 {
262 mysh.add (*sample);
263 } else
264 {
265 const std::string ds = grid->meta()->castString (MetaFields::gridName);
266 if (ds.empty())
267 RCU_THROW_MSG ("no dataset configured for grid dataset " + ds);
268
270
271 std::set<std::string> knownFiles;
272 std::map<std::string,std::string> usedFiles;
273 for (auto& entry : rucioListFileReplicas (ds))
274 {
275 if (RCU::match_expr (pattern, entry.name))
276 {
277 knownFiles.insert (entry.name);
278 if (entry.disk == disk)
279 {
280 std::string url = entry.replica;
281 const auto split = url.find (from);
282 if (split != std::string::npos)
283 url.replace(split, from.size(), to);
284 usedFiles[entry.name] = url;
285 }
286 }
287 }
288
289 if (usedFiles.empty())
290 {
291 if (allow_partial)
292 RCU_WARN_MSG ("dataset " + ds + " not at " + disk + ", skipped");
293 } else if (knownFiles.size() != usedFiles.size())
294 {
295 if (allow_partial)
296 {
297 RCU_WARN_MSG ("only incomplete version of dataset " + ds + " at " + disk);
298 } else
299 {
300 usedFiles.clear ();
301 }
302 }
303
304 if (usedFiles.size() == 0)
305 {
306 sh.add (*sample);
307 } else
308 {
309 std::unique_ptr<SampleLocal> mysample
310 (new SampleLocal (grid->name()));
311 *mysample->meta() = *grid->meta();
312
313 for (const auto& file : usedFiles)
314 {
315 mysample->add (file.second);
316 }
317 mysh.add (mysample.release());
318 }
319 }
320 }
321 swap (sh, mysh);
322 }
323
324
325
327 const std::string& pattern)
328 {
329 SamplePtr mysample = sample.makeLocal();
330 if (mysample->numFiles() == 0)
331 {
332 sh.add (&sample);
333 return;
334 }
335 std::unique_ptr<TFile> file (TFile::Open (mysample->fileName(0).c_str()));
336 if (!file.get())
337 RCU_THROW_MSG ("could not open file: " + mysample->fileName(0));
338 TObject *object = 0;
339 std::regex mypattern (pattern);
340 for (TIter iter (file->GetListOfKeys()); (object = iter.Next()); )
341 {
342 if (RCU::match_expr (mypattern, object->GetName()) &&
343 dynamic_cast<TTree*>(file->Get(object->GetName())))
344 {
345 std::string newName = sample.name() + "_" + object->GetName();
346 std::unique_ptr<Sample> newSample
347 (dynamic_cast<Sample*>(sample.Clone (newName.c_str())));
348 newSample->name (newName);
349 newSample->meta()->setString (MetaFields::treeName, object->GetName());
350 sh.add (newSample.release());
351 }
352 }
353 }
354
355
356
357 void scanForTrees (SampleHandler& sh, const std::string& pattern)
358 {
359 SH::SampleHandler sh_new;
360
361 for (SampleHandler::iterator sample = sh.begin(),
362 end = sh.end(); sample != end; ++ sample)
363 {
364 scanForTrees (sh_new, **sample, pattern);
365 }
366 swap (sh, sh_new);
367 }
368
369
370
371 void readFileList (SampleHandler& sh, const std::string& name,
372 const std::string& file)
373 {
374 std::ifstream myfile (file.c_str());
375
376 auto sample = std::make_unique<SampleLocal> (name);
377 std::string line;
378 const std::set<char> whitespaces{'\t',' ','\n','\r'};
379 while (std::getline (myfile, line))
380 {
381 while ((!line.empty()) && whitespaces.count(line.back())) line.pop_back();
382 if (!line.empty() && line.at(0) != '#')
383 {
384 sample->add (line);
385 }
386 }
387 if (!myfile.eof())
388 RCU_THROW_MSG ("failed to read file: " + file);
389 sh.add (sample.release());
390 }
391}
#define RCU_ASSERT_SOFT(x)
Definition Assert.h:167
static const std::vector< std::string > types
#define RCU_THROW_MSG(message)
Definition PrintMsg.h:58
#define RCU_WARN_MSG(message)
Definition PrintMsg.h:52
a DiskList implementation for local directories
an interface for listing directory contents, locally or on a file server
Definition DiskList.h:32
void swap(MetaObject &a, MetaObject &b)
standard swap
std::string castString(const std::string &name, const std::string &def_val="", CastMode mode=CAST_ERROR_THROW) const
the meta-data string with the given name
This class implements a Sample located on the grid.
Definition SampleGrid.h:44
A class that manages a list of Sample objects.
std::vector< Sample * >::const_iterator iterator
the iterator to use
void add(Sample *sample)
add a sample to the handler
A Sample based on a simple file list.
Definition SampleLocal.h:38
A smart pointer class that holds a single Sample object.
Definition SamplePtr.h:35
a base class that manages a set of files belonging to a particular data set and the associated meta-d...
Definition Sample.h:54
MetaObject * meta()
the meta-information for this sample
std::size_t numFiles() const
the number of files in the sample
std::string fileName(std::size_t index) const
the name of the file with the given index
const std::string & name() const
the name of the sample we are using
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
bool match_expr(const std::regex &expr, const std::string &str)
returns: whether we can match the entire string with the regular expression guarantee: strong failure...
std::string glob_to_regexp(const std::string &glob)
returns: a string that is the regular expression equivalent of the given glob expression guarantee: s...
This module provides a lot of global definitions, forward declarations and includes that are used by ...
Definition PrunDriver.h:15
void scanSingleDir(SampleHandler &sh, const std::string &name, DiskList &list, const std::string &pattern)
effects: scan the given directory tree and turn it into a single sample of the given name guarantee: ...
void addGrid(SampleHandler &sh, const std::string &ds)
effects: add a grid dataset for dataset ds guarantee: strong failures: out of memory II requires: ds....
void scanDQ2(SampleHandler &sh, const std::string &pattern)
effects: make a list from DQ2 using the given pattern guarantee: basic, may add partially failures: o...
void readFileList(SampleHandler &sh, const std::string &name, const std::string &file)
effects: read a file list from a text file guarantee: strong failures: out of memory III failures: i/...
Sample * makeFromTChain(const std::string &name, const TChain &chain)
effects: create a sample with the given name from the given TChain object guarantee: strong failures:...
void scanFiles(SampleHandler &sh, DiskList &list, const std::string &pattern)
effects: scan the given directory tree and make a separate sample for each file (using the file name ...
std::vector< RucioListFileReplicasEntry > rucioListFileReplicas(const std::string &dataset)
run rucio-list-file-replicas for the given dataset
std::vector< RucioListDidsEntry > rucioListDids(const std::string &dataset)
run rucio-list-dids for the given dataset
void makeGridDirect(SampleHandler &sh, const std::string &disk, const std::string &from, const std::string &to, bool allow_partial)
effects: update all grid samples in the sample handler that are located on the given disk to be opene...
void scanDir(SampleHandler &sh, DiskList &list, const std::string &pattern, const std::string &samplePattern, const std::string &samplePostfix)
effects: scan the given directory and add all subdirectories as samples that contain root files.
void scanRucio(SampleHandler &sh, const std::string &pattern, bool alwaysQuery)
make a list of grid datasets using the given pattern
void addGridCombined(SampleHandler &sh, const std::string &dsName, const std::vector< std::string > &dsList)
effects: add a combined grid dataset with name dsName for dataset list dsList guarantee: strong failu...
void scanForTrees(SampleHandler &sh, Sample &sample, const std::string &pattern)
effects: scan for trees in the given sample (or sample handler), and create a separate sample for eac...
void addGridCombinedFromFile(SampleHandler &sh, const std::string &dsName, const std::string &dsFile)
effects: add a combined grid dataset with name dsName for dataset list file dsFile guarantee: strong ...
static const std::string gridFilter
the field containing the file filter for the dataset on the grid
Definition MetaFields.h:38
static const std::string gridName
the field containing the name of the dataset on the grid
Definition MetaFields.h:34
static const std::string treeName
the name of the tree in the sample
Definition MetaFields.h:52
static const std::string gridFilter_default
the default value for gridFilter
Definition MetaFields.h:41
the class used for scanning local directories and file servers for samples
Definition ScanDir.h:38
ScanDir & samplePattern(const std::string &val_samplePattern)
the pattern for samples to be accepted
Definition ScanDir.cxx:130
const ScanDir & scan(SampleHandler &sh, const std::string &dir) const
scan the given directory and put the created samples into the sample handler
Definition ScanDir.cxx:168
ScanDir & sampleDepth(int val_sampleDepth)
the index of the file hierarchy at which we gather the sample name.
Definition ScanDir.cxx:47
ScanDir & maxDepth(std::size_t val_maxDepth)
the maximum depth for files to make it into the sample
Definition ScanDir.cxx:85
ScanDir & samplePostfix(const std::string &val_samplePostfix)
the pattern for the postfix to be stripped from the sampleName
Definition ScanDir.cxx:139
ScanDir & minDepth(std::size_t val_minDepth)
the minimum depth for files to make it into the sample
Definition ScanDir.cxx:76
ScanDir & sampleRename(const std::string &pattern, const std::string &name)
rename any sample matching pattern to name
Definition ScanDir.cxx:149
ScanDir & filePattern(const std::string &val_filePattern)
the pattern for files to be accepted
Definition ScanDir.cxx:94
TFile * file