ATLAS Offline Software
ToolsDiscovery.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3 */
4 
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 // (See accompanying file LICENSE_1_0.txt or copy at
8 // http://www.boost.org/LICENSE_1_0.txt)
9 
10 // Please feel free to contact me (krumnack@iastate.edu) for bug
11 // reports, feature suggestions, praise and complaints.
12 
13 
14 //
15 // includes
16 //
17 
19 
20 #include <RootCoreUtils/Assert.h>
22 #include <RootCoreUtils/ThrowMsg.h>
31 #include <SampleHandler/ScanDir.h>
32 #include <TChain.h>
33 #include <TChainElement.h>
34 #include <TFile.h>
35 #include <TTree.h>
36 #include <fstream>
37 #include <memory>
38 
39 //
40 // method implementations
41 //
42 
43 namespace SH
44 {
46  const std::string& pattern,
47  const std::string& samplePattern,
48  const std::string& samplePostfix)
49  {
50  ScanDir()
51  .sampleDepth (0)
52  .minDepth (1)
53  .maxDepth (1)
55  .samplePattern (samplePattern)
56  .samplePostfix (samplePostfix)
57  .scan (sh, list);
58  }
59 
60 
61 
62  void scanDir (SampleHandler& sh, const std::string& dir)
63  {
64  ScanDir()
65  .sampleDepth (0)
66  .minDepth (1)
67  .maxDepth (1)
68  .scan (sh, dir);
69  }
70 
71 
72 
73  void scanDir (SampleHandler& sh, const std::string& dir,
74  const std::string& prefix)
75  {
77  ScanDir()
78  .sampleDepth (0)
79  .minDepth (1)
80  .maxDepth (1)
81  .scan (sh, list);
82  }
83 
84 
85 
87  const std::string& pattern)
88  {
89  ScanDir()
90  .sampleDepth (-1)
92  .samplePostfix (".*")
93  .scan (sh, list);
94  }
95 
96 
97 
98  Sample *makeFromTChain (const std::string& name, const TChain& chain)
99  {
100  std::unique_ptr<SampleLocal> result (new SampleLocal (name));
101  result->meta()->setString (MetaFields::treeName, chain.GetName());
102 
103  TIter chainIter (chain.GetListOfFiles());
104  TChainElement *chainElement = 0;
105  while ((chainElement = dynamic_cast<TChainElement*>(chainIter.Next())) != 0)
106  result->add (chainElement->GetTitle());
107  return result.release();
108  }
109 
110 
111 
112  void scanSingleDir (SampleHandler& sh, const std::string& name,
113  DiskList& list, const std::string& pattern)
114  {
115  ScanDir()
116  .sampleDepth (0)
118  .sampleRename ("*", name)
119  .scan (sh, list);
120  }
121 
122 
123 
124  void scanDQ2 (SampleHandler& sh, const std::string& pattern)
125  {
126  if (pattern.find ("*") == std::string::npos)
127  {
128  addGrid (sh, pattern);
129  } else
130  {
131  std::set<std::string> types = {"DATASET", "DIDType.DATASET"};
132  if (pattern.back() == '/')
133  types = {"CONTAINER", "DIDType.CONTAINER"};
134 
135  auto subresult = rucioListDids (pattern);
136  for (auto& ds : subresult)
137  {
138  if (types.find (ds.type) != types.end())
139  addGrid (sh, ds.scope + ":" + ds.name);
140  }
141  }
142  }
143 
144 
145 
146  void scanRucio (SampleHandler& sh, const std::string& pattern,
147  bool alwaysQuery)
148  {
149  if (pattern.find ("*") == std::string::npos && !alwaysQuery)
150  {
151  addGrid (sh, pattern);
152  } else
153  {
154  auto subresult = rucioListDids (pattern);
155  bool added = false;
156  for (std::string type : {"CONTAINER", "DIDType.CONTAINER", "DATASET", "DIDType.DATASET"})
157  {
158  for (auto& ds : subresult)
159  {
160  if (ds.type == type)
161  {
162  addGrid (sh, ds.scope + ":" + ds.name);
163  added = true;
164  }
165  }
166  if (added)
167  return;
168  }
169  RCU_THROW_MSG ("failed to find any datasets matching pattern: " + pattern);
170  }
171  }
172 
173 
174 
175  void addGrid (SampleHandler& sh, const std::string& ds)
176  {
177  RCU_ASSERT_SOFT (ds.find ("*") == std::string::npos);
178 
179  std::string name;
180  if (ds[ds.size()-1] == '/')
181  name = ds.substr (0, ds.size()-1);
182  else
183  name = ds;
184 
185  auto sample = std::make_unique<SampleGrid> (name);
186  sample->meta()->setString (MetaFields::gridName, ds);
188  sh.add (sample.release());
189  }
190 
191 
192 
193  void addGridCombined (SampleHandler& sh, const std::string& dsName,
194  const std::vector<std::string>& dsList)
195  {
196  std::string name;
197  for (const std::string &ds : dsList)
198  {
199  RCU_ASSERT_SOFT (ds.find ("*") == std::string::npos);
200 
201  if (!name.empty())
202  name.append(",");
203 
204  if (ds.at(ds.size() - 1) == '/')
205  name.append(ds.substr (0, ds.size() - 1));
206  else
207  name.append(ds);
208  }
209 
210  auto sample = std::make_unique<SampleGrid> (dsName);
211  sample->meta()->setString (MetaFields::gridName, name);
213  sh.add (sample.release());
214  }
215  void addGridCombinedFromFile (SampleHandler& sh, const std::string& dsName,
216  const std::string& dsFile)
217  {
218  std::ifstream file (dsFile.c_str());
219 
220  std::string name;
221  std::string ds;
222  const std::set<char> whitespaces{'\t',' ','\n','\r'};
223  while (std::getline (file, ds))
224  {
225  while ((!ds.empty()) && whitespaces.count(ds.back())) ds.pop_back();
226  if (ds.empty() || ds.at(0) == '#')
227  continue;
228 
229  RCU_ASSERT_SOFT (ds.find ("*") == std::string::npos);
230 
231  if (!name.empty())
232  name.append(",");
233 
234  if (ds.at(ds.size() - 1) == '/')
235  name.append(ds.substr (0, ds.size() - 1));
236  else
237  name.append(ds);
238  }
239  if (!file.eof())
240  RCU_THROW_MSG ("failed to read file: " + dsFile);
241 
242  auto sample = std::make_unique<SampleGrid> (dsName);
243  sample->meta()->setString (MetaFields::gridName, name);
245  sh.add (sample.release());
246  }
247 
248 
249  void makeGridDirect (SampleHandler& sh, const std::string& disk,
250  const std::string& from, const std::string& to,
251  bool allow_partial)
252  {
253  SampleHandler mysh;
254 
255  for (SampleHandler::iterator sample = sh.begin(),
256  end = sh.end(); sample != end; ++ sample)
257  {
258  SampleGrid *grid = dynamic_cast<SampleGrid*>(*sample);
259 
260  if (grid == 0)
261  {
262  mysh.add (*sample);
263  } else
264  {
265  const std::string ds = grid->meta()->castString (MetaFields::gridName);
266  if (ds.empty())
267  RCU_THROW_MSG ("no dataset configured for grid dataset " + ds);
268 
270 
271  std::set<std::string> knownFiles;
272  std::map<std::string,std::string> usedFiles;
273  for (auto& entry : rucioListFileReplicas (ds))
274  {
275  if (RCU::match_expr (pattern, entry.name))
276  {
277  knownFiles.insert (entry.name);
278  if (entry.disk == disk)
279  {
280  std::string url = entry.replica;
281  const auto split = url.find (from);
282  if (split != std::string::npos)
283  url.replace(split, from.size(), to);
284  usedFiles[entry.name] = url;
285  }
286  }
287  }
288 
289  if (usedFiles.empty())
290  {
291  if (allow_partial)
292  RCU_WARN_MSG ("dataset " + ds + " not at " + disk + ", skipped");
293  } else if (knownFiles.size() != usedFiles.size())
294  {
295  if (allow_partial)
296  {
297  RCU_WARN_MSG ("only incomplete version of dataset " + ds + " at " + disk);
298  } else
299  {
300  usedFiles.clear ();
301  }
302  }
303 
304  if (usedFiles.size() == 0)
305  {
306  sh.add (*sample);
307  } else
308  {
309  std::unique_ptr<SampleLocal> mysample
310  (new SampleLocal (grid->name()));
311  *mysample->meta() = *grid->meta();
312 
313  for (const auto& file : usedFiles)
314  {
315  mysample->add (file.second);
316  }
317  mysh.add (mysample.release());
318  }
319  }
320  }
321  swap (sh, mysh);
322  }
323 
324 
325 
327  const std::string& pattern)
328  {
329  SamplePtr mysample = sample.makeLocal();
330  if (mysample->numFiles() == 0)
331  {
332  sh.add (&sample);
333  return;
334  }
335  std::unique_ptr<TFile> file (TFile::Open (mysample->fileName(0).c_str()));
336  if (!file.get())
337  RCU_THROW_MSG ("could not open file: " + mysample->fileName(0));
338  TObject *object = 0;
339  boost::regex mypattern (pattern);
340  for (TIter iter (file->GetListOfKeys()); (object = iter.Next()); )
341  {
342  if (RCU::match_expr (mypattern, object->GetName()) &&
343  dynamic_cast<TTree*>(file->Get(object->GetName())))
344  {
345  std::string newName = sample.name() + "_" + object->GetName();
346  std::unique_ptr<Sample> newSample
347  (dynamic_cast<Sample*>(sample.Clone (newName.c_str())));
348  newSample->name (newName);
349  newSample->meta()->setString (MetaFields::treeName, object->GetName());
350  sh.add (newSample.release());
351  }
352  }
353  }
354 
355 
356 
357  void scanForTrees (SampleHandler& sh, const std::string& pattern)
358  {
359  SH::SampleHandler sh_new;
360 
361  for (SampleHandler::iterator sample = sh.begin(),
362  end = sh.end(); sample != end; ++ sample)
363  {
364  scanForTrees (sh_new, **sample, pattern);
365  }
366  swap (sh, sh_new);
367  }
368 
369 
370 
371  void readFileList (SampleHandler& sh, const std::string& name,
372  const std::string& file)
373  {
374  std::ifstream myfile (file.c_str());
375 
376  auto sample = std::make_unique<SampleLocal> (name);
377  std::string line;
378  const std::set<char> whitespaces{'\t',' ','\n','\r'};
379  while (std::getline (myfile, line))
380  {
381  while ((!line.empty()) && whitespaces.count(line.back())) line.pop_back();
382  if (!line.empty() && line.at(0) != '#')
383  {
384  sample->add (line);
385  }
386  }
387  if (!myfile.eof())
388  RCU_THROW_MSG ("failed to read file: " + file);
389  sh.add (sample.release());
390  }
391 }
mergePhysValFiles.pattern
pattern
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:26
SH::ScanDir
the class used for scanning local directories and file servers for samples
Definition: ScanDir.h:38
SH::ScanDir::sampleRename
ScanDir & sampleRename(const std::string &pattern, const std::string &name)
rename any sample matching pattern to name
Definition: ScanDir.cxx:149
SH::Sample::numFiles
std::size_t numFiles() const
the number of files in the sample
checkxAOD.ds
ds
Definition: Tools/PyUtils/bin/checkxAOD.py:257
SH::SampleHandler::iterator
std::vector< Sample * >::const_iterator iterator
the iterator to use
Definition: SampleHandler.h:475
checkFileSG.line
line
Definition: checkFileSG.py:75
DiskListLocal.h
get_generator_info.result
result
Definition: get_generator_info.py:21
SH::ScanDir::maxDepth
ScanDir & maxDepth(std::size_t val_maxDepth)
the maximum depth for files to make it into the sample
Definition: ScanDir.cxx:85
ScanDir.h
runLayerRecalibration.chain
chain
Definition: runLayerRecalibration.py:175
SH::SampleHandler::add
void add(Sample *sample)
add a sample to the handler
SH::rucioListDids
std::vector< RucioListDidsEntry > rucioListDids(const std::string &dataset)
run rucio-list-dids for the given dataset
Definition: GridTools.cxx:348
SH::MetaFields::gridFilter_default
static const std::string gridFilter_default
the default value for gridFilter
Definition: MetaFields.h:41
SH::scanDir
void scanDir(SampleHandler &sh, DiskList &list, const std::string &pattern, const std::string &samplePattern, const std::string &samplePostfix)
effects: scan the given directory and add all subdirectories as samples that contain root files.
Definition: ToolsDiscovery.cxx:45
SH::scanDQ2
void scanDQ2(SampleHandler &sh, const std::string &pattern)
effects: make a list from DQ2 using the given pattern guarantee: basic, may add partially failures: o...
Definition: ToolsDiscovery.cxx:124
SH::rucioListFileReplicas
std::vector< RucioListFileReplicasEntry > rucioListFileReplicas(const std::string &dataset)
run rucio-list-file-replicas for the given dataset
Definition: GridTools.cxx:384
SampleHandler.h
ToolsDiscovery.h
SH::scanFiles
void scanFiles(SampleHandler &sh, DiskList &list, const std::string &pattern)
effects: scan the given directory tree and make a separate sample for each file (using the file name ...
Definition: ToolsDiscovery.cxx:86
Assert.h
taskman.dsName
dsName
Definition: taskman.py:292
StringUtil.h
physics_parameters.url
string url
Definition: physics_parameters.py:27
SH::ScanDir::samplePattern
ScanDir & samplePattern(const std::string &val_samplePattern)
the pattern for samples to be accepted
Definition: ScanDir.cxx:130
mergePhysValFiles.end
end
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:93
PrepareReferenceFile.regex
regex
Definition: PrepareReferenceFile.py:43
SamplePtr.h
SampleLocal.h
MetaObject.h
RCU_WARN_MSG
#define RCU_WARN_MSG(message)
Definition: PrintMsg.h:52
SH::ScanDir::samplePostfix
ScanDir & samplePostfix(const std::string &val_samplePostfix)
the pattern for the postfix to be stripped from the sampleName
Definition: ScanDir.cxx:139
FullCPAlgorithmsTest_eljob.sample
sample
Definition: FullCPAlgorithmsTest_eljob.py:100
RCU_ASSERT_SOFT
#define RCU_ASSERT_SOFT(x)
Definition: Assert.h:167
SH::ScanDir::sampleDepth
ScanDir & sampleDepth(int val_sampleDepth)
the index of the file hierarchy at which we gather the sample name.
Definition: ScanDir.cxx:47
RCU::Shell
Definition: ShellExec.cxx:28
RCU::match_expr
bool match_expr(const boost::regex &expr, const std::string &str)
returns: whether we can match the entire string with the regular expression guarantee: strong failure...
Definition: StringUtil.cxx:40
SH::DiskList
an interface for listing directory contents, locally or on a file server
Definition: DiskList.h:32
GridTools.h
SH::makeFromTChain
Sample * makeFromTChain(const std::string &name, const TChain &chain)
effects: create a sample with the given name from the given TChain object guarantee: strong failures:...
Definition: ToolsDiscovery.cxx:98
checkCorrelInHIST.prefix
dictionary prefix
Definition: checkCorrelInHIST.py:391
file
TFile * file
Definition: tile_monitor.h:29
SH::SampleGrid
This class implements a Sample located on the grid.
Definition: SampleGrid.h:44
SH::scanRucio
void scanRucio(SampleHandler &sh, const std::string &pattern, bool alwaysQuery)
make a list of grid datasets using the given pattern
Definition: ToolsDiscovery.cxx:146
SH::scanSingleDir
void scanSingleDir(SampleHandler &sh, const std::string &name, DiskList &list, const std::string &pattern)
effects: scan the given directory tree and turn it into a single sample of the given name guarantee: ...
Definition: ToolsDiscovery.cxx:112
SH::MetaFields::gridFilter
static const std::string gridFilter
the field containing the file filter for the dataset on the grid
Definition: MetaFields.h:38
SH::MetaObject::castString
std::string castString(const std::string &name, const std::string &def_val="", CastMode mode=CAST_ERROR_THROW) const
the meta-data string with the given name
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
SH::Sample::meta
MetaObject * meta()
the meta-information for this sample
SH::addGrid
void addGrid(SampleHandler &sh, const std::string &ds)
effects: add a grid dataset for dataset ds guarantee: strong failures: out of memory II requires: ds....
Definition: ToolsDiscovery.cxx:175
SH::MetaFields::treeName
static const std::string treeName
the name of the tree in the sample
Definition: MetaFields.h:52
WriteCalibToCool.swap
swap
Definition: WriteCalibToCool.py:94
SH::Sample::name
const std::string & name() const
the name of the sample we are using
SH::Sample
a base class that manages a set of files belonging to a particular data set and the associated meta-d...
Definition: Sample.h:54
SH::MetaObject::setString
void setString(const std::string &name, const std::string &value)
set the meta-data string with the given name
beamspotman.dir
string dir
Definition: beamspotman.py:623
GetAllXsec.entry
list entry
Definition: GetAllXsec.py:132
SH::addGridCombined
void addGridCombined(SampleHandler &sh, const std::string &dsName, const std::vector< std::string > &dsList)
effects: add a combined grid dataset with name dsName for dataset list dsList guarantee: strong failu...
Definition: ToolsDiscovery.cxx:193
name
std::string name
Definition: Control/AthContainers/Root/debug.cxx:192
MetaFields.h
ThrowMsg.h
MakeNewFileFromOldAndSubstitution.newName
dictionary newName
Definition: ICHEP2016/MakeNewFileFromOldAndSubstitution.py:95
SH::SamplePtr
A smart pointer class that holds a single Sample object.
Definition: SamplePtr.h:35
SH::DiskListLocal
a DiskList implementation for local directories
Definition: DiskListLocal.h:27
SH::Sample::fileName
std::string fileName(std::size_t index) const
the name of the file with the given index
SampleGrid.h
CxxUtils::to
CONT to(RANGE &&r)
Definition: ranges.h:32
SH::MetaFields::gridName
static const std::string gridName
the field containing the name of the dataset on the grid
Definition: MetaFields.h:34
SH::ScanDir::scan
const ScanDir & scan(SampleHandler &sh, const std::string &dir) const
scan the given directory and put the created samples into the sample handler
Definition: ScanDir.cxx:168
SH::makeGridDirect
void makeGridDirect(SampleHandler &sh, const std::string &disk, const std::string &from, const std::string &to, bool allow_partial)
effects: update all grid samples in the sample handler that are located on the given disk to be opene...
Definition: ToolsDiscovery.cxx:249
python.CaloScaleNoiseConfig.type
type
Definition: CaloScaleNoiseConfig.py:78
SH::SampleLocal
A Sample based on a simple file list.
Definition: SampleLocal.h:38
SH::scanForTrees
void scanForTrees(SampleHandler &sh, Sample &sample, const std::string &pattern)
effects: scan for trees in the given sample (or sample handler), and create a separate sample for eac...
Definition: ToolsDiscovery.cxx:326
pickleTool.object
object
Definition: pickleTool.py:30
SH::ScanDir::filePattern
ScanDir & filePattern(const std::string &val_filePattern)
the pattern for files to be accepted
Definition: ScanDir.cxx:94
SH::SampleHandler
A class that manages a list of Sample objects.
Definition: SampleHandler.h:60
SH
This module provides a lot of global definitions, forward declarations and includes that are used by ...
Definition: PrunDriver.h:15
RCU_THROW_MSG
#define RCU_THROW_MSG(message)
Definition: PrintMsg.h:58
SH::addGridCombinedFromFile
void addGridCombinedFromFile(SampleHandler &sh, const std::string &dsName, const std::string &dsFile)
effects: add a combined grid dataset with name dsName for dataset list file dsFile guarantee: strong ...
Definition: ToolsDiscovery.cxx:215
SH::SampleLocal::add
void add(const std::string &file)
add a file to the list
RCU::glob_to_regexp
std::string glob_to_regexp(const std::string &glob)
returns: a string that is the regular expression equivalent of the given glob expression guarantee: s...
Definition: StringUtil.cxx:56
Trk::split
@ split
Definition: LayerMaterialProperties.h:38
SH::readFileList
void readFileList(SampleHandler &sh, const std::string &name, const std::string &file)
effects: read a file list from a text file guarantee: strong failures: out of memory III failures: i/...
Definition: ToolsDiscovery.cxx:371
SH::ScanDir::minDepth
ScanDir & minDepth(std::size_t val_minDepth)
the minimum depth for files to make it into the sample
Definition: ScanDir.cxx:76