33#include <TChainElement.h>
46 const std::string& pattern,
47 const std::string& samplePattern,
48 const std::string& samplePostfix)
74 const std::string& prefix)
87 const std::string& pattern)
103 TIter chainIter (chain.GetListOfFiles());
104 TChainElement *chainElement = 0;
105 while ((chainElement =
dynamic_cast<TChainElement*
>(chainIter.Next())) != 0)
106 result->add (chainElement->GetTitle());
113 DiskList& list,
const std::string& pattern)
126 if (pattern.find (
"*") == std::string::npos)
131 std::set<std::string>
types = {
"DATASET",
"DIDType.DATASET"};
132 if (pattern.back() ==
'/')
133 types = {
"CONTAINER",
"DIDType.CONTAINER"};
136 for (
auto& ds : subresult)
149 if (pattern.find (
"*") == std::string::npos && !alwaysQuery)
156 for (std::string
type : {
"CONTAINER",
"DIDType.CONTAINER",
"DATASET",
"DIDType.DATASET"})
158 for (
auto& ds : subresult)
169 RCU_THROW_MSG (
"failed to find any datasets matching pattern: " + pattern);
180 if (ds[ds.size()-1] ==
'/')
181 name = ds.substr (0, ds.size()-1);
185 auto sample = std::make_unique<SampleGrid> (name);
188 sh.add (sample.release());
194 const std::vector<std::string>& dsList)
197 for (
const std::string &ds : dsList)
204 if (ds.at(ds.size() - 1) ==
'/')
205 name.append(ds.substr (0, ds.size() - 1));
210 auto sample = std::make_unique<SampleGrid> (dsName);
213 sh.add (sample.release());
216 const std::string& dsFile)
218 std::ifstream
file (dsFile.c_str());
222 const std::set<char> whitespaces{
'\t',
' ',
'\n',
'\r'};
223 while (std::getline (
file, ds))
225 while ((!ds.empty()) && whitespaces.count(ds.back())) ds.pop_back();
226 if (ds.empty() || ds.at(0) ==
'#')
234 if (ds.at(ds.size() - 1) ==
'/')
235 name.append(ds.substr (0, ds.size() - 1));
242 auto sample = std::make_unique<SampleGrid> (dsName);
245 sh.add (sample.release());
250 const std::string& from,
const std::string& to,
256 end =
sh.end(); sample != end; ++ sample)
267 RCU_THROW_MSG (
"no dataset configured for grid dataset " + ds);
271 std::set<std::string> knownFiles;
272 std::map<std::string,std::string> usedFiles;
277 knownFiles.insert (entry.name);
278 if (entry.disk == disk)
280 std::string url = entry.replica;
281 const auto split = url.find (from);
282 if (
split != std::string::npos)
283 url.replace(
split, from.size(), to);
284 usedFiles[entry.name] = url;
289 if (usedFiles.empty())
292 RCU_WARN_MSG (
"dataset " + ds +
" not at " + disk +
", skipped");
293 }
else if (knownFiles.size() != usedFiles.size())
297 RCU_WARN_MSG (
"only incomplete version of dataset " + ds +
" at " + disk);
304 if (usedFiles.size() == 0)
309 std::unique_ptr<SampleLocal> mysample
311 *mysample->meta() = *grid->
meta();
313 for (
const auto&
file : usedFiles)
315 mysample->add (
file.second);
317 mysh.
add (mysample.release());
327 const std::string& pattern)
335 std::unique_ptr<TFile>
file (TFile::Open (mysample->
fileName(0).c_str()));
339 std::regex mypattern (pattern);
340 for (TIter iter (
file->GetListOfKeys()); (
object = iter.Next()); )
343 dynamic_cast<TTree*
>(
file->Get(
object->GetName())))
345 std::string newName = sample.name() +
"_" +
object->GetName();
346 std::unique_ptr<Sample> newSample
347 (
dynamic_cast<Sample*
>(sample.Clone (newName.c_str())));
348 newSample->name (newName);
350 sh.add (newSample.release());
362 end =
sh.end(); sample != end; ++ sample)
372 const std::string&
file)
374 std::ifstream myfile (
file.c_str());
376 auto sample = std::make_unique<SampleLocal> (name);
378 const std::set<char> whitespaces{
'\t',
' ',
'\n',
'\r'};
379 while (std::getline (myfile, line))
381 while ((!line.empty()) && whitespaces.count(line.back())) line.pop_back();
382 if (!line.empty() && line.at(0) !=
'#')
389 sh.add (sample.release());
#define RCU_ASSERT_SOFT(x)
static const std::vector< std::string > types
#define RCU_THROW_MSG(message)
#define RCU_WARN_MSG(message)
a DiskList implementation for local directories
an interface for listing directory contents, locally or on a file server
This class implements a Sample located on the grid.
A class that manages a list of Sample objects.
std::vector< Sample * >::const_iterator iterator
the iterator to use
void add(Sample *sample)
add a sample to the handler
A Sample based on a simple file list.
A smart pointer class that holds a single Sample object.
a base class that manages a set of files belonging to a particular data set and the associated meta-d...
MetaObject * meta()
the meta-information for this sample
std::size_t numFiles() const
the number of files in the sample
std::string fileName(std::size_t index) const
the name of the file with the given index
const std::string & name() const
the name of the sample we are using
std::vector< std::string > split(const std::string &s, const std::string &t=":")
bool match_expr(const std::regex &expr, const std::string &str)
returns: whether we can match the entire string with the regular expression guarantee: strong failure...
std::string glob_to_regexp(const std::string &glob)
returns: a string that is the regular expression equivalent of the given glob expression guarantee: s...
This module provides a lot of global definitions, forward declarations and includes that are used by ...
void scanSingleDir(SampleHandler &sh, const std::string &name, DiskList &list, const std::string &pattern)
effects: scan the given directory tree and turn it into a single sample of the given name guarantee: ...
void addGrid(SampleHandler &sh, const std::string &ds)
effects: add a grid dataset for dataset ds guarantee: strong failures: out of memory II requires: ds....
void scanDQ2(SampleHandler &sh, const std::string &pattern)
effects: make a list from DQ2 using the given pattern guarantee: basic, may add partially failures: o...
void readFileList(SampleHandler &sh, const std::string &name, const std::string &file)
effects: read a file list from a text file guarantee: strong failures: out of memory III failures: i/...
Sample * makeFromTChain(const std::string &name, const TChain &chain)
effects: create a sample with the given name from the given TChain object guarantee: strong failures:...
void scanFiles(SampleHandler &sh, DiskList &list, const std::string &pattern)
effects: scan the given directory tree and make a separate sample for each file (using the file name ...
std::vector< RucioListFileReplicasEntry > rucioListFileReplicas(const std::string &dataset)
run rucio-list-file-replicas for the given dataset
std::vector< RucioListDidsEntry > rucioListDids(const std::string &dataset)
run rucio-list-dids for the given dataset
void makeGridDirect(SampleHandler &sh, const std::string &disk, const std::string &from, const std::string &to, bool allow_partial)
effects: update all grid samples in the sample handler that are located on the given disk to be opene...
void scanDir(SampleHandler &sh, DiskList &list, const std::string &pattern, const std::string &samplePattern, const std::string &samplePostfix)
effects: scan the given directory and add all subdirectories as samples that contain root files.
void scanRucio(SampleHandler &sh, const std::string &pattern, bool alwaysQuery)
make a list of grid datasets using the given pattern
void addGridCombined(SampleHandler &sh, const std::string &dsName, const std::vector< std::string > &dsList)
effects: add a combined grid dataset with name dsName for dataset list dsList guarantee: strong failu...
void scanForTrees(SampleHandler &sh, Sample &sample, const std::string &pattern)
effects: scan for trees in the given sample (or sample handler), and create a separate sample for eac...
void addGridCombinedFromFile(SampleHandler &sh, const std::string &dsName, const std::string &dsFile)
effects: add a combined grid dataset with name dsName for dataset list file dsFile guarantee: strong ...
the class used for scanning local directories and file servers for samples
ScanDir & samplePattern(const std::string &val_samplePattern)
the pattern for samples to be accepted
const ScanDir & scan(SampleHandler &sh, const std::string &dir) const
scan the given directory and put the created samples into the sample handler
ScanDir & sampleDepth(int val_sampleDepth)
the index of the file hierarchy at which we gather the sample name.
ScanDir & maxDepth(std::size_t val_maxDepth)
the maximum depth for files to make it into the sample
ScanDir & samplePostfix(const std::string &val_samplePostfix)
the pattern for the postfix to be stripped from the sampleName
ScanDir & minDepth(std::size_t val_minDepth)
the minimum depth for files to make it into the sample
ScanDir & sampleRename(const std::string &pattern, const std::string &name)
rename any sample matching pattern to name
ScanDir & filePattern(const std::string &val_filePattern)
the pattern for files to be accepted