25#include <TChainElement.h>
38 const std::string& pattern,
39 const std::string& samplePattern,
40 const std::string& samplePostfix)
66 const std::string& prefix)
79 const std::string& pattern)
92 std::unique_ptr<SampleLocal> result (
new SampleLocal (name));
95 TIter chainIter (chain.GetListOfFiles());
96 TChainElement *chainElement = 0;
97 while ((chainElement =
dynamic_cast<TChainElement*
>(chainIter.Next())) != 0)
98 result->add (chainElement->GetTitle());
99 return result.release();
105 DiskList& list,
const std::string& pattern)
118 if (pattern.find (
"*") == std::string::npos)
123 std::set<std::string>
types = {
"DATASET",
"DIDType.DATASET"};
124 if (pattern.back() ==
'/')
125 types = {
"CONTAINER",
"DIDType.CONTAINER"};
128 for (
auto& ds : subresult)
141 if (pattern.find (
"*") == std::string::npos && !alwaysQuery)
148 for (std::string
type : {
"CONTAINER",
"DIDType.CONTAINER",
"DATASET",
"DIDType.DATASET"})
150 for (
auto& ds : subresult)
161 RCU_THROW_MSG (
"failed to find any datasets matching pattern: " + pattern);
172 if (ds[ds.size()-1] ==
'/')
173 name = ds.substr (0, ds.size()-1);
177 auto sample = std::make_unique<SampleGrid> (name);
180 sh.add (std::move (sample));
186 const std::vector<std::string>& dsList)
189 for (
const std::string &ds : dsList)
196 if (ds.at(ds.size() - 1) ==
'/')
197 name.append(ds.substr (0, ds.size() - 1));
202 auto sample = std::make_unique<SampleGrid> (dsName);
205 sh.add (std::move (sample));
208 const std::string& dsFile)
210 std::ifstream
file (dsFile.c_str());
214 const std::set<char> whitespaces{
'\t',
' ',
'\n',
'\r'};
215 while (std::getline (
file, ds))
217 while ((!ds.empty()) && whitespaces.count(ds.back())) ds.pop_back();
218 if (ds.empty() || ds.at(0) ==
'#')
226 if (ds.at(ds.size() - 1) ==
'/')
227 name.append(ds.substr (0, ds.size() - 1));
234 auto sample = std::make_unique<SampleGrid> (dsName);
237 sh.add (std::move (sample));
242 const std::string& from,
const std::string& to,
247 for (
auto sample :
sh.samples())
258 RCU_THROW_MSG (
"no dataset configured for grid dataset " + ds);
262 std::set<std::string> knownFiles;
263 std::map<std::string,std::string> usedFiles;
268 knownFiles.insert (entry.name);
269 if (entry.disk == disk)
271 std::string url = entry.replica;
272 const auto split = url.find (from);
273 if (
split != std::string::npos)
274 url.replace(
split, from.size(), to);
275 usedFiles[entry.name] = url;
280 if (usedFiles.empty())
283 RCU_WARN_MSG (
"dataset " + ds +
" not at " + disk +
", skipped");
284 }
else if (knownFiles.size() != usedFiles.size())
288 RCU_WARN_MSG (
"only incomplete version of dataset " + ds +
" at " + disk);
295 if (usedFiles.size() == 0)
300 auto mysample = std::make_unique<SampleLocal> (grid->
name());
301 *mysample->meta() = *grid->
meta();
303 for (
const auto&
file : usedFiles)
305 mysample->add (
file.second);
307 mysh.
add (std::move (mysample));
317 const std::string& pattern)
319 auto mysample = sample->makeLocal();
320 if (mysample->numFiles() == 0)
325 std::unique_ptr<TFile>
file (TFile::Open (mysample->fileName(0).c_str()));
327 RCU_THROW_MSG (
"could not open file: " + mysample->fileName(0));
329 std::regex mypattern (pattern);
330 for (TIter iter (
file->GetListOfKeys()); (
object = iter.Next()); )
333 dynamic_cast<TTree*
>(
file->Get(object->GetName())))
335 std::string newName = sample->name() +
"_" +
object->GetName();
336 std::unique_ptr<Sample> newSample
337 (
dynamic_cast<Sample*
>(sample->Clone (newName.c_str())));
338 newSample->name (newName);
340 sh.add (std::move (newSample));
351 for (
auto sample :
sh.samples())
361 const std::string&
file)
363 std::ifstream myfile (
file.c_str());
365 auto sample = std::make_unique<SampleLocal> (name);
367 const std::set<char> whitespaces{
'\t',
' ',
'\n',
'\r'};
368 while (std::getline (myfile, line))
370 while ((!line.empty()) && whitespaces.count(line.back())) line.pop_back();
371 if (!line.empty() && line.at(0) !=
'#')
378 sh.add (std::move (sample));
#define RCU_ASSERT_SOFT(x)
static const std::vector< std::string > types
#define RCU_THROW_MSG(message)
#define RCU_WARN_MSG(message)
a DiskList implementation for local directories
an interface for listing directory contents, locally or on a file server
This class implements a Sample located on the grid.
A class that manages a list of Sample objects.
void add(const Sample &sample)
add a copy of the sample to the handler
A Sample based on a simple file list.
a base class that manages a set of files belonging to a particular data set and the associated meta-d...
MetaObject * meta()
the meta-information for this sample
const std::string & name() const
the name of the sample we are using
std::vector< std::string > split(const std::string &s, const std::string &t=":")
bool match_expr(const std::regex &expr, std::string_view str)
returns: whether we can match the entire string with the regular expression guarantee: strong failure...
std::string glob_to_regexp(std::string_view glob)
returns: a string that is the regular expression equivalent of the given glob expression guarantee: s...
This module provides a lot of global definitions, forward declarations and includes that are used by ...
void scanSingleDir(SampleHandler &sh, const std::string &name, DiskList &list, const std::string &pattern)
effects: scan the given directory tree and turn it into a single sample of the given name guarantee: ...
void addGrid(SampleHandler &sh, const std::string &ds)
effects: add a grid dataset for dataset ds guarantee: strong failures: out of memory II requires: ds....
void scanForTrees(SampleHandler &sh, std::shared_ptr< Sample > &sample, const std::string &pattern)
effects: scan for trees in the given sample (or sample handler), and create a separate sample for eac...
void scanDQ2(SampleHandler &sh, const std::string &pattern)
effects: make a list from DQ2 using the given pattern guarantee: basic, may add partially failures: o...
void readFileList(SampleHandler &sh, const std::string &name, const std::string &file)
effects: read a file list from a text file guarantee: strong failures: out of memory III failures: i/...
Sample * makeFromTChain(const std::string &name, const TChain &chain)
effects: create a sample with the given name from the given TChain object guarantee: strong failures:...
void scanFiles(SampleHandler &sh, DiskList &list, const std::string &pattern)
effects: scan the given directory tree and make a separate sample for each file (using the file name ...
std::vector< RucioListFileReplicasEntry > rucioListFileReplicas(const std::string &dataset)
run rucio-list-file-replicas for the given dataset
std::vector< RucioListDidsEntry > rucioListDids(const std::string &dataset)
run rucio-list-dids for the given dataset
void makeGridDirect(SampleHandler &sh, const std::string &disk, const std::string &from, const std::string &to, bool allow_partial)
effects: update all grid samples in the sample handler that are located on the given disk to be opene...
void scanDir(SampleHandler &sh, DiskList &list, const std::string &pattern, const std::string &samplePattern, const std::string &samplePostfix)
effects: scan the given directory and add all subdirectories as samples that contain root files.
void scanRucio(SampleHandler &sh, const std::string &pattern, bool alwaysQuery)
make a list of grid datasets using the given pattern
void addGridCombined(SampleHandler &sh, const std::string &dsName, const std::vector< std::string > &dsList)
effects: add a combined grid dataset with name dsName for dataset list dsList guarantee: strong failu...
void addGridCombinedFromFile(SampleHandler &sh, const std::string &dsName, const std::string &dsFile)
effects: add a combined grid dataset with name dsName for dataset list file dsFile guarantee: strong ...
the class used for scanning local directories and file servers for samples
ScanDir & samplePattern(const std::string &val_samplePattern)
the pattern for samples to be accepted
const ScanDir & scan(SampleHandler &sh, const std::string &dir) const
scan the given directory and put the created samples into the sample handler
ScanDir & sampleDepth(int val_sampleDepth)
the index of the file hierarchy at which we gather the sample name.
ScanDir & maxDepth(std::size_t val_maxDepth)
the maximum depth for files to make it into the sample
ScanDir & samplePostfix(const std::string &val_samplePostfix)
the pattern for the postfix to be stripped from the sampleName
ScanDir & minDepth(std::size_t val_minDepth)
the minimum depth for files to make it into the sample
ScanDir & sampleRename(const std::string &pattern, const std::string &name)
rename any sample matching pattern to name
ScanDir & filePattern(const std::string &val_filePattern)
the pattern for files to be accepted