ATLAS Offline Software
Loading...
Searching...
No Matches
SH Namespace Reference

This module provides a lot of global definitions, forward declarations and includes that are used by all modules. More...

Classes

class  DiskList
 an interface for listing directory contents, locally or on a file server More...
class  DiskListEOS
 a DiskList implementation for EOS More...
class  DiskListLocal
 a DiskList implementation for local directories More...
class  DiskListSRM
 a DiskList implementation for the SRM protocol More...
class  DiskListXRD
 a DiskList implementation for the XRD protocol More...
class  DiskOutput
 a class/interface representing an output location for files More...
class  DiskOutputLocal
 an implementation of DiskOutput for local disks More...
class  DiskOutputXRD
 an implementation of DiskOutput using the XRD protocol More...
class  DiskWriter
 an interface that manages writing a single output file More...
class  DiskWriterLocal
 an implementation of DiskWriter for local files More...
class  DiskWriterXRD
 an implementation of DiskWriter for the XRD protocol More...
class  Meta
 A base class for classes that implement arbitrary meta-data. More...
 A class implementing a templatized version of the meta-data. More...
struct  MetaDataQuery
 an object containing the result of an AMI meta-data query More...
struct  MetaDataSample
 all the meta-data fields that we may try to read from AMI. More...
struct  MetaFields
 This class contains the known meta-data names. More...
struct  MetaNames
 the names of common meta-data More...
class  MetaObject
 A class that manages meta-data to be associated with an object. More...
class  MetaVector
 This class defines a templatized version of the meta-data in vector form. More...
struct  RucioDownloadResult
 the result from rucio_download More...
struct  RucioListDidsEntry
 one entry from the rucio-list-dids command More...
struct  RucioListFileReplicasEntry
 one entry from the rucio-list-file-replicas command More...
class  Sample
 a base class that manages a set of files belonging to a particular data set and the associated meta-data. More...
class  SampleComposite
 This module defines an implementation of Sample that contains composite samples. More...
class  SampleGrid
 This class implements a Sample located on the grid. More...
class  SampleHandler
 A class that manages a list of Sample objects. More...
class  SampleHist
 A sample that represents a single histogram file. More...
class  SampleLocal
 A Sample based on a simple file list. More...
class  SampleMeta
 A Sample that consists only of Meta-Information. More...
class  SamplePtr
 A smart pointer class that holds a single Sample object. More...
struct  ScanDir
 the class used for scanning local directories and file servers for samples More...
class  TagList
 A class that manages a list of tags for identifying samples. More...

Functions

void fetchMetaData (MetaDataQuery &query)
 effects: fetch information on all the samples/datasets specified guarantee: basic
void fetchMetaData (SampleHandler &sh, bool override)
 effects: fetch information on all the samples in the sample handler.
const std::string & downloadStageEnvVar ()
 the name of the environment variable containing the directory for staging files from the grid
bool checkVomsProxy ()
 return whether we have a valid VOMS proxy available
void ensureVomsProxy ()
 ensure that we have a valid VOMS proxy available
std::vector< std::string > faxListFilesGlob (const std::string &name, const std::string &filter)
 list the FAX URLs for all the files in the dataset or dataset container matching the given filter (as glob expression)
std::vector< std::string > faxListFilesRegex (const std::string &name, const std::string &filter)
 list the FAX URLs for all the files in the dataset or dataset container matching the given filter (as regular expression)
std::vector< std::string > rucioDirectAccessGlob (const std::string &name, const std::string &filter, const std::string &selectOptions)
 list the rucio URLs for all the files in the dataset or dataset container matching the given filter (as glob expression)
std::vector< std::string > rucioDirectAccessRegex (const std::string &name, const std::string &filter, const std::string &selectOptions)
 list the rucio URLs for all the files in the dataset or dataset container matching the given filter (as regular expression)
std::vector< RucioListDidsEntryrucioListDids (const std::string &dataset)
 run rucio-list-dids for the given dataset
std::vector< RucioListFileReplicasEntryrucioListFileReplicas (const std::string &dataset)
 run rucio-list-file-replicas for the given dataset
std::map< std::string, std::unique_ptr< MetaObject > > rucioGetMetadata (const std::set< std::string > &datasets)
 run rucio-get-metadata for the given list of datasets
RucioDownloadResult rucioDownload (const std::string &location, const std::string &dataset)
 run rucio-download
std::vector< RucioDownloadResultrucioDownloadList (const std::string &location, const std::vector< std::string > &datasets)
 run rucio-download with multiple datasets
std::vector< std::string > rucioCacheDatasetGlob (const std::string &location, const std::string &dataset, const std::string &fileGlob)
 download the dataset, and return a list matching the pattern
std::string dbg (const Meta &, unsigned)
std::string dbg (const TagList &obj, unsigned)
void scanDir (SampleHandler &sh, DiskList &list, const std::string &pattern="*.root*", const std::string &samplePattern="*", const std::string &samplePostfix="")
 effects: scan the given directory and add all subdirectories as samples that contain root files.
void scanDir (SampleHandler &sh, const std::string &dir)
void scanDir (SampleHandler &sh, const std::string &dir, const std::string &prefix)
void scanFiles (SampleHandler &sh, DiskList &list, const std::string &pattern="*.root*")
 effects: scan the given directory tree and make a separate sample for each file (using the file name before any "." as the sample name).
SamplemakeFromTChain (const std::string &name, const TChain &chain)
 effects: create a sample with the given name from the given TChain object guarantee: strong failures: out of memory II
void scanSingleDir (SampleHandler &sh, const std::string &name, DiskList &list, const std::string &pattern="*.root*")
 effects: scan the given directory tree and turn it into a single sample of the given name guarantee: strong failures: out of memory III
void scanDQ2 (SampleHandler &sh, const std::string &pattern)
 effects: make a list from DQ2 using the given pattern guarantee: basic, may add partially failures: out of memory III failures: dq2-ls errors
void scanRucio (SampleHandler &sh, const std::string &pattern, bool alwaysQuery=false)
 make a list of grid datasets using the given pattern
void addGrid (SampleHandler &sh, const std::string &ds)
 effects: add a grid dataset for dataset ds guarantee: strong failures: out of memory II requires: ds.find ("*") == std::string::npos
void addGridCombined (SampleHandler &sh, const std::string &dsName, const std::vector< std::string > &dsList)
 effects: add a combined grid dataset with name dsName for dataset list dsList guarantee: strong failures: out of memory II requires: for each dataset ds: ds.find ("*") == std::string::npos
void addGridCombinedFromFile (SampleHandler &sh, const std::string &dsName, const std::string &dsFile)
 effects: add a combined grid dataset with name dsName for dataset list file dsFile guarantee: strong failures: out of memory II requires: for each dataset ds: ds.find ("*") == std::string::npos
void makeGridDirect (SampleHandler &sh, const std::string &disk, const std::string &from, const std::string &to, bool allow_partial)
 effects: update all grid samples in the sample handler that are located on the given disk to be opened directly.
void scanForTrees (SampleHandler &sh, Sample &sample, const std::string &pattern=".*")
 effects: scan for trees in the given sample (or sample handler), and create a separate sample for each tree.
void scanForTrees (SampleHandler &sh, const std::string &pattern)
void readFileList (SampleHandler &sh, const std::string &name, const std::string &file)
 effects: read a file list from a text file guarantee: strong failures: out of memory III failures: i/o errors
void printDuplicateEvents (const Sample &sample)
 effects: check the given sample for duplicate events and then print them out guarantee: basic, may print partially failures: out of memory III failures: i/o errors
void printDuplicateEventsSplit (const SampleHandler &sh)
 effects: check each sample for duplicate events and then print them out guarantee: basic, may print partially failures: out of memory III failures: i/o errors
void printDuplicateEventsJoint (const SampleHandler &sh)
 effects: check for duplicate events between all the samples and then print them out guarantee: basic, may print partially failures: out of memory III failures: i/o errors
void mergeSamples (SampleHandler &sh, const std::string &sampleName, const std::string &pattern)
 effects: remove all samples matching the name pattern, and join them into a single sample named sampleName guarantee: strong failures: out of memory II failures: i/o errors
void readSusyMeta (const SampleHandler &sh, const std::string &inputFile)
 effects: read the susy-meta-data file and add its information to the samples from the given sample handler guarantee: basic failures: i/o errors warning: there are no checks for duplicate and inconsistent sample definitions
void readSusyMetaDir (const SampleHandler &sh, const std::string &inputDir)
 effects: read an entire directory of susy-meta-data files and add their information to the samples from the given sample handle.
std::unique_ptr< TFile > openFile (const std::string &name, const MetaObject &options)
 open a file with the given options
std::unique_ptr< SampleLocalmergeFiles (const Sample &sample, const std::string &location, bool overwrite)
 merge all the files in the sample into a single file in the given location
SampleHandler mergeFiles (const SampleHandler &sh, const std::string &location, bool overwrite)
 merge all the files in the sample handles into a single file per sample in the given location
void scanNEvents (SampleHandler &sh)
 effects: scan each sample in the sample handler and store the number of entries per file in the meta-data guarantee: basic, may only scan some failures: out of memory failures: read errors failures: invalid sample type
void scanNEvents (Sample &sample)
 effects: scan the given sample and store the number of entries per file in the meta-data guarantee: strong failures: out of memory failures: read errors failures: invalid sample type
SampleHandler splitSample (Sample &sample, Long64_t nevt)
 effects: split the given sample into a set of samples, with each sample containing either exactly one file or at most nevt events side effects: if scanNEvents hasn't been run on this sample, run it.
std::string dbg (const Meta &obj, unsigned verbosity=0)
 the debugging info of this object
std::string dbg (const MetaObject &obj, unsigned verbosity=0)
 the debugging info of this object
void swap (MetaObject &a, MetaObject &b)
 standard swap
std::string dbg (const Sample &obj, unsigned verbosity=0)
 the debugging info of this object
std::string dbg (const SampleHandler &obj, unsigned verbosity=0)
 the debugging info of this object
void swap (SampleHandler &a, SampleHandler &b)
 standard swap
std::string dbg (const TagList &obj, unsigned verbosity=0)
 the debugging info of this object

Detailed Description

This module provides a lot of global definitions, forward declarations and includes that are used by all modules.

This module defines utility functions used for splitting samples.

This module defines utility functions used for joining samples.

Author
Alexander Madsen
Nils Krumnack
Nils Krumnack

As such it doesn't fall into the user vs. expert classification. This module is considered to be in the pre-alpha stage.

The interface provided in this module is intended for the general user. The module is considered to be in the pre-alpha stage.

Function Documentation

◆ addGrid()

void SH::addGrid ( SampleHandler & sh,
const std::string & ds )

effects: add a grid dataset for dataset ds guarantee: strong failures: out of memory II requires: ds.find ("*") == std::string::npos

Definition at line 175 of file ToolsDiscovery.cxx.

176 {
177 RCU_ASSERT_SOFT (ds.find ("*") == std::string::npos);
178
179 std::string name;
180 if (ds[ds.size()-1] == '/')
181 name = ds.substr (0, ds.size()-1);
182 else
183 name = ds;
184
185 auto sample = std::make_unique<SampleGrid> (name);
186 sample->meta()->setString (MetaFields::gridName, ds);
187 sample->meta()->setString (MetaFields::gridFilter, MetaFields::gridFilter_default);
188 sh.add (sample.release());
189 }
#define RCU_ASSERT_SOFT(x)
Definition Assert.h:167
static const std::string gridFilter
the field containing the file filter for the dataset on the grid
Definition MetaFields.h:38
static const std::string gridName
the field containing the name of the dataset on the grid
Definition MetaFields.h:34
static const std::string gridFilter_default
the default value for gridFilter
Definition MetaFields.h:41

◆ addGridCombined()

void SH::addGridCombined ( SampleHandler & sh,
const std::string & dsName,
const std::vector< std::string > & dsList )

effects: add a combined grid dataset with name dsName for dataset list dsList guarantee: strong failures: out of memory II requires: for each dataset ds: ds.find ("*") == std::string::npos

Definition at line 193 of file ToolsDiscovery.cxx.

195 {
196 std::string name;
197 for (const std::string &ds : dsList)
198 {
199 RCU_ASSERT_SOFT (ds.find ("*") == std::string::npos);
200
201 if (!name.empty())
202 name.append(",");
203
204 if (ds.at(ds.size() - 1) == '/')
205 name.append(ds.substr (0, ds.size() - 1));
206 else
207 name.append(ds);
208 }
209
210 auto sample = std::make_unique<SampleGrid> (dsName);
211 sample->meta()->setString (MetaFields::gridName, name);
212 sample->meta()->setString (MetaFields::gridFilter, MetaFields::gridFilter_default);
213 sh.add (sample.release());
214 }

◆ addGridCombinedFromFile()

void SH::addGridCombinedFromFile ( SampleHandler & sh,
const std::string & dsName,
const std::string & dsFile )

effects: add a combined grid dataset with name dsName for dataset list file dsFile guarantee: strong failures: out of memory II requires: for each dataset ds: ds.find ("*") == std::string::npos

Definition at line 215 of file ToolsDiscovery.cxx.

217 {
218 std::ifstream file (dsFile.c_str());
219
220 std::string name;
221 std::string ds;
222 const std::set<char> whitespaces{'\t',' ','\n','\r'};
223 while (std::getline (file, ds))
224 {
225 while ((!ds.empty()) && whitespaces.count(ds.back())) ds.pop_back();
226 if (ds.empty() || ds.at(0) == '#')
227 continue;
228
229 RCU_ASSERT_SOFT (ds.find ("*") == std::string::npos);
230
231 if (!name.empty())
232 name.append(",");
233
234 if (ds.at(ds.size() - 1) == '/')
235 name.append(ds.substr (0, ds.size() - 1));
236 else
237 name.append(ds);
238 }
239 if (!file.eof())
240 RCU_THROW_MSG ("failed to read file: " + dsFile);
241
242 auto sample = std::make_unique<SampleGrid> (dsName);
243 sample->meta()->setString (MetaFields::gridName, name);
245 sh.add (sample.release());
246 }
#define RCU_THROW_MSG(message)
Definition PrintMsg.h:58
TFile * file

◆ checkVomsProxy()

bool SH::checkVomsProxy ( )

return whether we have a valid VOMS proxy available

Guarantee
basic
Failures
grid utility failures

Definition at line 206 of file GridTools.cxx.

207 {
208 return proxyData().checkVomsProxy();
209 }

◆ dbg() [1/7]

std::string SH::dbg ( const Meta & ,
unsigned  )
related

Definition at line 28 of file Meta.cxx.

29 {
30 return "meta-object";
31 }

◆ dbg() [2/7]

std::string SH::dbg ( const TagList & obj,
unsigned  )
related

Definition at line 30 of file TagList.cxx.

31 {
32 std::string result;
33
34 for (TagList::iterator tag = obj.begin(), end = obj.end();
35 tag != end; ++ tag)
36 {
37 if (!result.empty())
38 result += ",";
39 result += *tag;
40 };
41 return "(" + result + ")";
42 }
std::set< std::string >::const_iterator iterator
the iterator to use
Definition TagList.h:96

◆ downloadStageEnvVar()

const std::string & SH::downloadStageEnvVar ( )

the name of the environment variable containing the directory for staging files from the grid

Definition at line 198 of file GridTools.cxx.

199 {
200 static const std::string result = "SAMPLEHANDLER_RUCIO_DOWNLOAD";
201 return result;
202 }

◆ ensureVomsProxy()

void SH::ensureVomsProxy ( )

ensure that we have a valid VOMS proxy available

First this checks whether we have a valid PROXY, and if not it sets up a new certificate.

Guarantee
basic
Failures
grid utility failures
failure to set up new VOMS proxy

Definition at line 213 of file GridTools.cxx.

214 {
215 proxyData().ensureVomsProxy();
216 }

◆ faxListFilesGlob()

std::vector< std::string > SH::faxListFilesGlob ( const std::string & name,
const std::string & filter )

list the FAX URLs for all the files in the dataset or dataset container matching the given filter (as glob expression)

Guarantee
strong
Failures
grid utility failures
Precondition
!name.empty()
name.find('*') == std::string::npos
!filter.empty()

Definition at line 221 of file GridTools.cxx.

222 {
223#pragma GCC diagnostic push
224#pragma GCC diagnostic ignored "-Wpragmas"
225#pragma GCC diagnostic ignored "-Wunknown-pragmas"
226#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
227 return faxListFilesRegex (name, RCU::glob_to_regexp (filter));
228#pragma GCC diagnostic pop
229 }
std::string glob_to_regexp(const std::string &glob)
returns: a string that is the regular expression equivalent of the given glob expression guarantee: s...
std::vector< std::string > faxListFilesRegex(const std::string &name, const std::string &filter)
list the FAX URLs for all the files in the dataset or dataset container matching the given filter (as...

◆ faxListFilesRegex()

std::vector< std::string > SH::faxListFilesRegex ( const std::string & name,
const std::string & filter )

list the FAX URLs for all the files in the dataset or dataset container matching the given filter (as regular expression)

Guarantee
strong
Failures
grid utility failures
Precondition
!name.empty()
name.find('*') == std::string::npos
!filter.empty()

Definition at line 234 of file GridTools.cxx.

235 {
236 RCU_REQUIRE_SOFT (!name.empty());
237 RCU_REQUIRE_SOFT (name.find('*') == std::string::npos);
238 RCU_REQUIRE_SOFT (!filter.empty());
239
241
242 static const std::string separator = "------- SampleHandler Split -------";
243 std::vector<std::string> result;
244
245 ANA_MSG_INFO ("querying FAX for dataset " << name);
246 std::string output = sh::exec_read ("source $ATLAS_LOCAL_ROOT_BASE/user/atlasLocalSetup.sh -q && lsetup --force fax && echo " + separator + " && fax-get-gLFNs " + sh::quote (name));
247 auto split = output.rfind (separator + "\n");
248 if (split == std::string::npos)
249 RCU_THROW_MSG ("couldn't find separator in: " + output);
250
251 std::istringstream str (output.substr (split + separator.size() + 1));
252 std::regex pattern (filter);
253 std::string line;
254 while (std::getline (str, line))
255 {
256 if (!line.empty())
257 {
258 if (!line.starts_with ("root:"))
259 RCU_THROW_MSG ("couldn't parse line: " + line);
260
261 std::string::size_type split1 = line.rfind (":");
262 std::string::size_type split2 = line.rfind ("/");
263 if (split1 < split2)
264 split1 = split2;
265 if (split1 != std::string::npos)
266 {
267 if (RCU::match_expr (pattern, line.substr (split1+1)))
268 result.push_back (line);
269 } else
270 RCU_THROW_MSG ("couldn't parse line: " + line);
271 }
272 }
273 if (result.size() == 0)
274 RCU_WARN_MSG ("dataset " + name + " did not contain any files. this is likely not right");
275 return result;
276 }
#define RCU_REQUIRE_SOFT(x)
Definition Assert.h:153
#define ANA_MSG_INFO(xmsg)
Macro printing info messages.
#define RCU_WARN_MSG(message)
Definition PrintMsg.h:52
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
std::string exec_read(const std::string &cmd)
effects: execute the given command and return the output returns: the output of the command guarantee...
Definition ShellExec.cxx:37
std::string quote(const std::string &name)
effects: quote the given name to protect it from the shell returns: the quoted name guarantee: strong...
Definition ShellExec.cxx:75
bool match_expr(const std::regex &expr, const std::string &str)
returns: whether we can match the entire string with the regular expression guarantee: strong failure...
void ensureVomsProxy()
ensure that we have a valid VOMS proxy available

◆ fetchMetaData() [1/2]

void SH::fetchMetaData ( MetaDataQuery & query)

effects: fetch information on all the samples/datasets specified guarantee: basic

Definition at line 31 of file fetch.cxx.

32 {
33 static std::once_flag loaded;
34 auto do_load = []() {
35 TString path = "$ROOTCOREBIN/python/SampleHandler/SampleHandler_QueryAMI.py";
36 gSystem->ExpandPathName (path);
37 TPython::LoadMacro (path.Data());
38 };
39 std::call_once (loaded, do_load);
40
41 std::ostringstream command;
42#if ROOT_VERSION_CODE >= ROOT_VERSION(6,33,01)
43 command << "_anyresult = ";
44#endif
45 command << "SampleHandler_QueryAmi([";
46 for (std::size_t iter = 0, end = query.samples.size(); iter != end; ++ iter)
47 {
48 if (iter != 0)
49 command << ", ";
50 command << "'" << query.samples[iter].name << "'";
51 }
52 command << "])";
53#if ROOT_VERSION_CODE >= ROOT_VERSION(6,33,01)
54 std::any result;
55 TPython::Exec (command.str().c_str(), &result);
56 query = std::any_cast<MetaDataQuery>(result);
57#else
58 MetaDataQuery* myquery = static_cast<MetaDataQuery*>
59 ((void*) TPython::Eval (command.str().c_str()));
60 query = *myquery;
61#endif
62 }
query
Definition index.py:72
Definition query.py:1
an object containing the result of an AMI meta-data query

◆ fetchMetaData() [2/2]

void SH::fetchMetaData ( SampleHandler & sh,
bool override )

effects: fetch information on all the samples in the sample handler.

if override is specified it will override the existing meta-data fields, otherwise they will only be used if the given fields do not yet exist guarantee: basic

Definition at line 65 of file fetch.cxx.

66 {
67 std::vector<SH::Sample*> samples;
68 // typedef std::vector<SH::Sample*> SamplesIter;
70 for (SH::SampleHandler::iterator sample = sh.begin(),
71 end = sh.end(); sample != end; ++ sample)
72 {
73 std::string name = (*sample)->meta()->castString (SH::MetaFields::gridName,(*sample)->name());
74 query.samples.push_back (MetaDataSample (name));
75 samples.push_back (&**sample);
76 }
78
79 if (!query.messages.empty())
80 RCU_PRINT_MSG (query.messages);
81 for (std::size_t iter = 0, end = query.samples.size(); iter != end; ++ iter)
82 {
83 if (query.samples[iter].unknown)
84 {
85 RCU_WARN_MSG ("failed to find sample " + query.samples[iter].name);
86 } else
87 {
88 RCU_ASSERT (iter != samples.size());
89 SH::Sample *sample = samples[iter];
90
91 if (!override)
92 {
93 query.samples[iter].isData = sample->meta()->castDouble (SH::MetaFields::isData, query.samples[iter].isData);
94 query.samples[iter].luminosity = sample->meta()->castDouble (SH::MetaFields::lumi, query.samples[iter].luminosity);
95 query.samples[iter].crossSection = sample->meta()->castDouble (SH::MetaFields::crossSection, query.samples[iter].crossSection);
96 query.samples[iter].nevents = sample->meta()->castDouble (SH::MetaFields::numEvents, query.samples[iter].nevents);
97 query.samples[iter].kfactor = sample->meta()->castDouble (SH::MetaFields::kfactor, query.samples[iter].kfactor);
98 query.samples[iter].filterEfficiency = sample->meta()->castDouble (SH::MetaFields::filterEfficiency, query.samples[iter].filterEfficiency);
99 }
100 if (query.samples[iter].isData != -1)
101 sample->meta()->setDouble (SH::MetaFields::isData, query.samples[iter].isData);
102 if (query.samples[iter].luminosity != -1)
103 sample->meta()->setDouble (SH::MetaFields::lumi, query.samples[iter].luminosity);
104 if (query.samples[iter].crossSection != -1)
105 sample->meta()->setDouble (SH::MetaFields::crossSection, query.samples[iter].crossSection);
106 if (query.samples[iter].nevents != -1)
107 sample->meta()->setDouble (SH::MetaFields::numEvents, query.samples[iter].nevents);
108 if (query.samples[iter].kfactor != -1)
109 sample->meta()->setDouble (SH::MetaFields::kfactor, query.samples[iter].kfactor);
110 if (query.samples[iter].filterEfficiency != -1)
111 sample->meta()->setDouble (SH::MetaFields::filterEfficiency, query.samples[iter].filterEfficiency);
112 }
113 }
114 }
#define RCU_ASSERT(x)
Definition Assert.h:222
#define RCU_PRINT_MSG(message)
Definition PrintMsg.h:49
std::vector< Sample * >::const_iterator iterator
the iterator to use
void fetchMetaData(MetaDataQuery &query)
effects: fetch information on all the samples/datasets specified guarantee: basic
Definition fetch.cxx:31
all the meta-data fields that we may try to read from AMI.
static const std::string kfactor
the k-factor of the sample
Definition MetaFields.h:82
static const std::string numEvents
the number of events
Definition MetaFields.h:64
static const std::string isData
whether the sample is data
Definition MetaFields.h:79
static const std::string lumi
the luminosity of the sample
Definition MetaFields.h:76
static const std::string crossSection
the cross section field
Definition MetaFields.h:58
static const std::string filterEfficiency
the filter efficiency of the sample
Definition MetaFields.h:85

◆ makeFromTChain()

Sample * SH::makeFromTChain ( const std::string & name,
const TChain & chain )

effects: create a sample with the given name from the given TChain object guarantee: strong failures: out of memory II

Definition at line 98 of file ToolsDiscovery.cxx.

99 {
100 std::unique_ptr<SampleLocal> result (new SampleLocal (name));
101 result->meta()->setString (MetaFields::treeName, chain.GetName());
102
103 TIter chainIter (chain.GetListOfFiles());
104 TChainElement *chainElement = 0;
105 while ((chainElement = dynamic_cast<TChainElement*>(chainIter.Next())) != 0)
106 result->add (chainElement->GetTitle());
107 return result.release();
108 }
A Sample based on a simple file list.
Definition SampleLocal.h:38
static const std::string treeName
the name of the tree in the sample
Definition MetaFields.h:52

◆ makeGridDirect()

void SH::makeGridDirect ( SampleHandler & sh,
const std::string & disk,
const std::string & from,
const std::string & to,
bool allow_partial )

effects: update all grid samples in the sample handler that are located on the given disk to be opened directly.

for that it replaces the from part of the file with the to part. if allow_partial is set, it allows for partial datasets (meaning you may lose some files that are not there). guarantee: basic, may convert only some samples failures: out of memory III failures: dq2-ls errors failures: from not part of file name

Definition at line 249 of file ToolsDiscovery.cxx.

252 {
253 SampleHandler mysh;
254
255 for (SampleHandler::iterator sample = sh.begin(),
256 end = sh.end(); sample != end; ++ sample)
257 {
258 SampleGrid *grid = dynamic_cast<SampleGrid*>(*sample);
259
260 if (grid == 0)
261 {
262 mysh.add (*sample);
263 } else
264 {
265 const std::string ds = grid->meta()->castString (MetaFields::gridName);
266 if (ds.empty())
267 RCU_THROW_MSG ("no dataset configured for grid dataset " + ds);
268
270
271 std::set<std::string> knownFiles;
272 std::map<std::string,std::string> usedFiles;
273 for (auto& entry : rucioListFileReplicas (ds))
274 {
275 if (RCU::match_expr (pattern, entry.name))
276 {
277 knownFiles.insert (entry.name);
278 if (entry.disk == disk)
279 {
280 std::string url = entry.replica;
281 const auto split = url.find (from);
282 if (split != std::string::npos)
283 url.replace(split, from.size(), to);
284 usedFiles[entry.name] = url;
285 }
286 }
287 }
288
289 if (usedFiles.empty())
290 {
291 if (allow_partial)
292 RCU_WARN_MSG ("dataset " + ds + " not at " + disk + ", skipped");
293 } else if (knownFiles.size() != usedFiles.size())
294 {
295 if (allow_partial)
296 {
297 RCU_WARN_MSG ("only incomplete version of dataset " + ds + " at " + disk);
298 } else
299 {
300 usedFiles.clear ();
301 }
302 }
303
304 if (usedFiles.size() == 0)
305 {
306 sh.add (*sample);
307 } else
308 {
309 std::unique_ptr<SampleLocal> mysample
310 (new SampleLocal (grid->name()));
311 *mysample->meta() = *grid->meta();
312
313 for (const auto& file : usedFiles)
314 {
315 mysample->add (file.second);
316 }
317 mysh.add (mysample.release());
318 }
319 }
320 }
321 swap (sh, mysh);
322 }
void swap(MetaObject &a, MetaObject &b)
standard swap
std::string castString(const std::string &name, const std::string &def_val="", CastMode mode=CAST_ERROR_THROW) const
the meta-data string with the given name
This class implements a Sample located on the grid.
Definition SampleGrid.h:44
A class that manages a list of Sample objects.
void add(Sample *sample)
add a sample to the handler
MetaObject * meta()
the meta-information for this sample
const std::string & name() const
the name of the sample we are using
std::vector< RucioListFileReplicasEntry > rucioListFileReplicas(const std::string &dataset)
run rucio-list-file-replicas for the given dataset

◆ mergeFiles() [1/2]

std::unique_ptr< SampleLocal > SH::mergeFiles ( const Sample & sample,
const std::string & location,
bool overwrite )

merge all the files in the sample into a single file in the given location

This is mostly meant to allow merging histogram files (particularly in EventLoop), but in some cases it can also be used to merge n-tuple fiels together.

Returns
a sample containing the merged file
Guarantee
basic
Failures
i/o errors
out of memory III

Definition at line 73 of file ToolsOther.cxx.

75 {
76 std::unique_ptr<SampleLocal> result (new SampleLocal (sample.name()));
77 result->add (location);
78 *result->meta() = *sample.meta();
79 if (overwrite || gSystem->AccessPathName (location.c_str()) != 0)
80 {
81 std::vector<std::string> files = sample.makeFileList();
82 RCU::hadd (location, files);
83 }
84 return result;
85 }
std::vector< std::string > files
file names and file pointers
Definition hcg.cxx:50
void hadd(const std::string &output_file, const std::vector< std::string > &input_files, unsigned max_files)
effects: perform the hadd functionality guarantee: basic failures: out of memory III failures: i/o er...
Definition hadd.cxx:28

◆ mergeFiles() [2/2]

SampleHandler SH::mergeFiles ( const SampleHandler & sh,
const std::string & location,
bool overwrite )

merge all the files in the sample handles into a single file per sample in the given location

This is mostly meant to allow merging histogram files (particularly in EventLoop), but in some cases it can also be used to merge n-tuple files together.

Returns
a sample handler containing the merged files
Guarantee
basic
Failures
i/o errors
out of memory III

Definition at line 90 of file ToolsOther.cxx.

92 {
94 for (auto& sample : sh)
95 {
96 std::string sublocation = location + sample->name() + ".root";
97 result.add (mergeFiles (*sample, sublocation, overwrite));
98 }
99 return result;
100 }
std::unique_ptr< SampleLocal > mergeFiles(const Sample &sample, const std::string &location, bool overwrite)
merge all the files in the sample into a single file in the given location

◆ mergeSamples()

void SH::mergeSamples ( SampleHandler & sh,
const std::string & sampleName,
const std::string & pattern )

effects: remove all samples matching the name pattern, and join them into a single sample named sampleName guarantee: strong failures: out of memory II failures: i/o errors

Definition at line 31 of file ToolsJoin.cxx.

33 {
34 SampleHandler mysh;
35 std::unique_ptr<SampleLocal> mysample (new SampleLocal (sampleName));
36
37 std::regex mypattern (pattern.c_str());
38 for (SampleHandler::iterator sample = sh.begin(),
39 end = sh.end(); sample != end; ++ sample)
40 {
41 if (RCU::match_expr (mypattern, (*sample)->name()))
42 {
43 for (unsigned file = 0, end = (*sample)->numFiles();
44 file != end; ++ file)
45 {
46 mysample->add ((*sample)->fileName (file));
47 }
48 } else
49 {
50 mysh.add (*sample);
51 }
52 }
53 mysh.add (mysample.release());
54 swap (mysh, sh);
55 }

◆ openFile()

std::unique_ptr< TFile > SH::openFile ( const std::string & name,
const MetaObject & options )

open a file with the given options

for now this is to allow opening files with retries, but in the future there may be other options as well.

Guarantee
strong
Failures
i/o errors
file not found
Postcondition
result != nullptr

Definition at line 35 of file ToolsOther.cxx.

36 {
37 const unsigned retries
38 = options.castInteger (MetaNames::openRetries(),
40 const double wait
41 = options.castDouble (MetaNames::openRetriesWait(),
43 if (wait < 0)
44 RCU_THROW_MSG ("negative values not allowed for: " + MetaNames::openRetriesWait());
45
46 std::unique_ptr<TFile> result;
47
48 static std::atomic<unsigned> seed = 0;
49 std::default_random_engine re (++seed);
50 std::uniform_real_distribution wait_dist (wait/2, wait);
51
52 for (unsigned tries = 0; tries <= retries; ++ tries)
53 {
54 if (tries > 0)
55 {
56 unsigned mywait = 1000 * wait_dist (re);
57 std::cout << "open failed, waiting " << (mywait/1000.) << " seconds: "
58 << name << std::endl;
59 std::this_thread::sleep_for (std::chrono::milliseconds (mywait));
60 }
61 result.reset (TFile::Open (name.c_str(), "READ"));
62 if (result != nullptr)
63 return result;
64 }
65 RCU_THROW_MSG ("failed to open file: " + name);
66 //cppcheck-suppress rethrowNoCurrentException
67 throw; //compiler dummy
68 }
const boost::regex re(r_e)
static const std::string & openRetries()
the number of retries for opening a file
Definition MetaNames.cxx:23
static double openRetriesWait_default()
the default value of openRetriesWait
Definition MetaNames.cxx:49
static unsigned openRetries_default()
the default value of openRetries
Definition MetaNames.cxx:32
static const std::string & openRetriesWait()
the amount of time (in seconds) to wait when doing retries
Definition MetaNames.cxx:40

◆ printDuplicateEvents()

void SH::printDuplicateEvents ( const Sample & sample)

effects: check the given sample for duplicate events and then print them out guarantee: basic, may print partially failures: out of memory III failures: i/o errors

Definition at line 142 of file ToolsDuplicates.cxx.

143 {
144 RunEventList list;
145 std::unique_ptr<TChain> chain (sample.makeTChain ());
146 printDuplicateEvents (*chain, list);
147 }
void printDuplicateEvents(const Sample &sample)
effects: check the given sample for duplicate events and then print them out guarantee: basic,...

◆ printDuplicateEventsJoint()

void SH::printDuplicateEventsJoint ( const SampleHandler & sh)

effects: check for duplicate events between all the samples and then print them out guarantee: basic, may print partially failures: out of memory III failures: i/o errors

Definition at line 162 of file ToolsDuplicates.cxx.

163 {
164 RunEventList list;
165 for (SampleHandler::iterator sample = sh.begin(),
166 end = sh.end(); sample != end; ++ sample)
167 {
168 std::unique_ptr<TChain> chain ((*sample)->makeTChain ());
169 printDuplicateEvents (*chain, list);
170 }
171 }

◆ printDuplicateEventsSplit()

void SH::printDuplicateEventsSplit ( const SampleHandler & sh)

effects: check each sample for duplicate events and then print them out guarantee: basic, may print partially failures: out of memory III failures: i/o errors

Definition at line 151 of file ToolsDuplicates.cxx.

152 {
153 for (SampleHandler::iterator sample = sh.begin(),
154 end = sh.end(); sample != end; ++ sample)
155 {
156 printDuplicateEvents (**sample);
157 }
158 }

◆ readFileList()

void SH::readFileList ( SampleHandler & sh,
const std::string & name,
const std::string & file )

effects: read a file list from a text file guarantee: strong failures: out of memory III failures: i/o errors

Definition at line 371 of file ToolsDiscovery.cxx.

373 {
374 std::ifstream myfile (file.c_str());
375
376 auto sample = std::make_unique<SampleLocal> (name);
377 std::string line;
378 const std::set<char> whitespaces{'\t',' ','\n','\r'};
379 while (std::getline (myfile, line))
380 {
381 while ((!line.empty()) && whitespaces.count(line.back())) line.pop_back();
382 if (!line.empty() && line.at(0) != '#')
383 {
384 sample->add (line);
385 }
386 }
387 if (!myfile.eof())
388 RCU_THROW_MSG ("failed to read file: " + file);
389 sh.add (sample.release());
390 }

◆ readSusyMeta()

void SH::readSusyMeta ( const SampleHandler & sh,
const std::string & inputFile )

effects: read the susy-meta-data file and add its information to the samples from the given sample handler guarantee: basic failures: i/o errors warning: there are no checks for duplicate and inconsistent sample definitions

Definition at line 30 of file ToolsMeta.cxx.

31 {
32 TString myfile = inputFile;
33 gSystem->ExpandPathName (myfile);
34 std::ifstream file (myfile.Data());
35 std::string line;
36
37 if (!file)
38 RCU_THROW_MSG ("failed to read file: " + inputFile);
39
40 while (std::getline (file, line))
41 {
42 if (line[0] >= '0' && line[0] <= '9')
43 {
44 std::istringstream str (line);
45 std::string DSID, name;
46 float crossSection = 0, kFactor = 0, filterEfficiency = 0,
47 xsUncertainty = 0;
48
49 if (!(str >> DSID >> name >> crossSection >> kFactor >> filterEfficiency
50 >> xsUncertainty))
51 RCU_THROW_MSG ("failed to parse line: " + line);
52
53 std::string mydsid = "." + DSID + ".";
54 for (SampleHandler::iterator sample = sh.begin(),
55 end = sh.end(); sample != end; ++ sample)
56 {
57 if ((*sample)->name().find (mydsid) != std::string::npos)
58 {
59 double oldCrossSection
60 = (*sample)->meta()->castDouble (MetaFields::crossSection);
61 (*sample)->meta()->setDouble (MetaFields::crossSection, oldCrossSection + crossSection);
62 (*sample)->meta()->setDouble (MetaFields::kfactor, kFactor);
63 (*sample)->meta()->setDouble (MetaFields::filterEfficiency, filterEfficiency);
64 (*sample)->meta()->setDouble (MetaFields::crossSectionRelUncertainty, xsUncertainty);
65 }
66 }
67 }
68 }
69 }
static const std::string crossSectionRelUncertainty
the relative uncertainty on the cross section
Definition MetaFields.h:61

◆ readSusyMetaDir()

void SH::readSusyMetaDir ( const SampleHandler & sh,
const std::string & inputDir )

effects: read an entire directory of susy-meta-data files and add their information to the samples from the given sample handle.

for that it assumes that all files ending in ".txt" are susy-meta-data files. guarantee: basic failures: i/o errors warning: there are no checks for duplicate and inconsistent sample definitions

Definition at line 71 of file ToolsMeta.cxx.

72 {
73 TString mydir = inputDir;
74 gSystem->ExpandPathName (mydir);
75 void *dirp = 0;
76
77 try
78 {
79 dirp = gSystem->OpenDirectory (mydir.Data());
80 const char *file = 0;
81 while ((file = gSystem->GetDirEntry (dirp)))
82 {
83 std::string myfile = inputDir + "/" + file;
84 if (myfile.size() > 4 && myfile.substr (myfile.size()-4) == ".txt")
85 readSusyMeta (sh, myfile);
86 }
87 gSystem->FreeDirectory (dirp);
88 } catch (...)
89 {
90 gSystem->FreeDirectory (dirp);
91 throw;
92 }
93 }
void readSusyMeta(const SampleHandler &sh, const std::string &inputFile)
effects: read the susy-meta-data file and add its information to the samples from the given sample ha...
Definition ToolsMeta.cxx:30

◆ rucioCacheDatasetGlob()

std::vector< std::string > SH::rucioCacheDatasetGlob ( const std::string & location,
const std::string & dataset,
const std::string & fileGlob )

download the dataset, and return a list matching the pattern

Guarantee
basic
Failures
grid utility failures i/o errors

Definition at line 535 of file GridTools.cxx.

538 {
539 std::vector<std::string> result;
540
541 std::string path = location;
542 if (path.back() != '/')
543 path += "/";
544 if (dataset.find (':') != std::string::npos)
545 path += dataset.substr (dataset.find (':')+1);
546 else
547 path += dataset;
548 const std::string finished {
549 path + "-finished"};
550
551 // check if the finished file does not exist
552 // note that AccessPathName has the weirdest calling convention
553 if (gSystem->AccessPathName (finished.c_str()) != 0)
554 {
555 RucioDownloadResult status = rucioDownload (location, dataset);
556 if (status.downloadedFiles + status.alreadyLocal < status.totalFiles)
557 throw std::runtime_error ("failed to download all files of " + dataset);
558 // this just creates an empty file
559 std::ofstream (finished.c_str());
560 }
561
562 std::string output = sh::exec_read ("find " + sh::quote (path) + " -type f -name " + sh::quote (fileGlob));
563 std::istringstream str (output);
564 std::string line;
565 while (std::getline (str, line))
566 {
567 if (!line.empty())
568 result.push_back (line);
569 }
570 return result;
571 }
RucioDownloadResult rucioDownload(const std::string &location, const std::string &dataset)
run rucio-download
output
Definition merge.py:16

◆ rucioDirectAccessGlob()

std::vector< std::string > SH::rucioDirectAccessGlob ( const std::string & name,
const std::string & filter,
const std::string & selectOptions )

list the rucio URLs for all the files in the dataset or dataset container matching the given filter (as glob expression)

Guarantee
strong
Failures
grid utility failures
Precondition
!name.empty()
name.find('*') == std::string::npos
!filter.empty()

Definition at line 281 of file GridTools.cxx.

283 {
284 return rucioDirectAccessRegex (name, RCU::glob_to_regexp (filter),
285 selectOptions);
286 }
std::vector< std::string > rucioDirectAccessRegex(const std::string &name, const std::string &filter, const std::string &selectOptions)
list the rucio URLs for all the files in the dataset or dataset container matching the given filter (...

◆ rucioDirectAccessRegex()

std::vector< std::string > SH::rucioDirectAccessRegex ( const std::string & name,
const std::string & filter,
const std::string & selectOptions )

list the rucio URLs for all the files in the dataset or dataset container matching the given filter (as regular expression)

Guarantee
strong
Failures
grid utility failures
Precondition
!name.empty()
name.find('*') == std::string::npos
!filter.empty()

Definition at line 291 of file GridTools.cxx.

293 {
294 RCU_REQUIRE_SOFT (!name.empty());
295 RCU_REQUIRE_SOFT (name.find('*') == std::string::npos);
296 RCU_REQUIRE_SOFT (!filter.empty());
297
299
300 static const std::string separator = "------- SampleHandler Split -------";
301
302 ANA_MSG_INFO ("querying rucio for dataset " << name);
303 std::string output = sh::exec_read (rucioSetupCommand() + " && echo " + separator + " && rucio list-file-replicas --pfns --protocols root " + selectOptions + " " + sh::quote (name));
304 auto split = output.rfind (separator + "\n");
305 if (split == std::string::npos)
306 RCU_THROW_MSG ("couldn't find separator in: " + output);
307 std::istringstream str (output.substr (split + separator.size() + 1));
308
309 // this is used to avoid getting two copies of the same file. we
310 // first fill them in a map by filename, then copy them into a
311 // vector
312 std::map<std::string,std::string> resultMap;
313
314 std::regex urlPattern ("^root://.*");
315 std::regex pattern (filter);
316 std::string line;
317 while (std::getline (str, line))
318 {
319 if (line.empty())
320 {
321 // no-op
322 } else if (!RCU::match_expr (urlPattern, line))
323 {
324 ANA_MSG_INFO ("couldn't handle line: " << line);
325 } else
326 {
327 std::string::size_type split = line.rfind ("/");
328 if (split != std::string::npos)
329 {
330 std::string filename = line.substr (split+1);
331 if (RCU::match_expr (pattern, filename))
332 resultMap[filename] = line;
333 } else
334 RCU_THROW_MSG ("couldn't parse line: " + line);
335 }
336 }
337
338 std::vector<std::string> result;
339 for (const auto& file : resultMap)
340 result.push_back (file.second);
341 if (result.size() == 0)
342 ANA_MSG_WARNING ("dataset " + name + " did not contain any files. this is likely not right");
343 return result;
344 }
#define ANA_MSG_WARNING(xmsg)
Macro printing warning messages.

◆ rucioDownload()

RucioDownloadResult SH::rucioDownload ( const std::string & location,
const std::string & dataset )

run rucio-download

Guarantee
basic
Failures
grid utility failures

Definition at line 496 of file GridTools.cxx.

498 {
500
501 const std::string separator = "------- SampleHandler Split -------";
502 std::string command = rucioSetupCommand() + " && echo " + separator + " && cd " + sh::quote (location) + " && rucio download " + sh::quote (dataset) + " 2>&1";
503
504 ANA_MSG_INFO ("starting rucio download " + dataset + " into " + location);
505 std::string output = sh::exec_read (command);
506 auto split = output.rfind (separator + "\n");
507 if (split == std::string::npos)
508 RCU_THROW_MSG ("couldn't find separator in: " + output);
509 output = output.substr (split + separator.size() + 1);
510
512 result.did = readLine (output, "DID ");
513 result.totalFiles = readLineUnsigned (output, "Total files (DID): ");
514 result.downloadedFiles = readLineUnsigned (output, "Downloaded files: ");
515 result.alreadyLocal = readLineUnsigned (output, "Files already found locally: ");
516 result.notDownloaded = readLineUnsigned (output, "Files that cannot be downloaded: ");
517 return result;
518 }
the result from rucio_download
Definition GridTools.h:175

◆ rucioDownloadList()

std::vector< RucioDownloadResult > SH::rucioDownloadList ( const std::string & location,
const std::vector< std::string > & datasets )

run rucio-download with multiple datasets

Guarantee
basic
Failures
grid utility failures

Definition at line 523 of file GridTools.cxx.

525 {
526 std::vector<RucioDownloadResult> result;
527 for (auto& dataset : datasets)
528 result.push_back (rucioDownload (location, dataset));
529 return result;
530 }

◆ rucioGetMetadata()

std::map< std::string, std::unique_ptr< MetaObject > > SH::rucioGetMetadata ( const std::set< std::string > & datasets)

run rucio-get-metadata for the given list of datasets

Guarantee
strong
Failures
grid utility failures
Precondition
!datasets.empty()
!dataset.empty() (for each dataset)

Definition at line 426 of file GridTools.cxx.

427 {
428 RCU_REQUIRE_SOFT (!datasets.empty());
429
431
432 static const std::string separator = "------- SampleHandler Split -------";
433 std::map<std::string,std::unique_ptr<MetaObject> > result;
434
435 std::string command = rucioSetupCommand() + " && echo " + separator + " && rucio get-metadata";
436 for (auto& dataset : datasets)
437 {
438 RCU_REQUIRE_SOFT (!dataset.empty());
439 command += " " + sh::quote (dataset);
440 }
441
442 ANA_MSG_INFO ("querying rucio for meta-data");
443 std::string output = sh::exec_read (command);
444 auto split = output.rfind (separator + "\n");
445 if (split == std::string::npos)
446 RCU_THROW_MSG ("couldn't find separator in: " + output);
447
448 std::istringstream str (output.substr (split + separator.size() + 1));
449 std::regex pattern ("^([^:]+): *(.+)$");
450 std::string line;
451 std::unique_ptr<MetaObject> meta (new MetaObject);
452
453 auto addMeta = [&] ()
454 {
455 std::string name = meta->castString ("scope") + ":" + meta->castString ("name");
456 if (result.find (name) != result.end())
457 RCU_THROW_MSG ("read " + name + " twice");
458 result[name] = std::move (meta);
459 };
460
461 while (std::getline (str, line))
462 {
463 std::smatch what;
464 if (line == "------")
465 {
466 addMeta ();
467 meta.reset (new MetaObject);
468 } else if (std::regex_match (line, what, pattern))
469 {
470 if (meta->get (what[1]))
471 throw std::runtime_error (std::string("duplicate entry: ") + what[1].str());
472 meta->setString (what[1], what[2]);
473 } else if (!line.empty())
474 {
475 ANA_MSG_WARNING ("couldn't parse line: " << line);
476 }
477 }
478 addMeta ();
479
480 for (auto& subresult : result)
481 {
482 if (datasets.find (subresult.first) == datasets.end())
483 RCU_THROW_MSG ("received result for dataset not requested: " + subresult.first);
484 }
485 for (auto& dataset : datasets)
486 {
487 if (result.find (dataset) == result.end())
488 RCU_THROW_MSG ("received no result for dataset: " + dataset);
489 }
490
491 return result;
492 }
A class that manages meta-data to be associated with an object.
Definition MetaObject.h:56
-diff

◆ rucioListDids()

std::vector< RucioListDidsEntry > SH::rucioListDids ( const std::string & dataset)

run rucio-list-dids for the given dataset

Guarantee
strong
Failures
grid utility failures
Precondition
!dataset.empty()

Definition at line 348 of file GridTools.cxx.

349 {
350 RCU_REQUIRE_SOFT (!dataset.empty());
351
353
354 static const std::string separator = "------- SampleHandler Split -------";
355 std::vector<RucioListDidsEntry> result;
356
357 ANA_MSG_INFO ("querying rucio for dataset " << dataset);
358 std::string output = sh::exec_read (rucioSetupCommand() + " && echo " + separator + " && rucio list-dids " + sh::quote (dataset));
359 auto split = output.rfind (separator + "\n");
360 if (split == std::string::npos)
361 RCU_THROW_MSG ("couldn't find separator in: " + output);
362
363 std::istringstream str (output.substr (split + separator.size() + 1));
364 std::regex pattern ("^\\| ([a-zA-Z0-9_.-]+):([a-zA-Z0-9_.-]+) +\\| ([a-zA-Z0-9_.-]+) +\\| *$");
365 std::string line;
366 while (std::getline (str, line))
367 {
368 std::smatch what;
369 if (std::regex_match (line, what, pattern))
370 {
371 RucioListDidsEntry entry;
372 entry.scope = what[1];
373 entry.name = what[2];
374 entry.type = what[3];
375 result.push_back (entry);
376 }
377 }
378 return result;
379 }
one entry from the rucio-list-dids command
Definition GridTools.h:107

◆ rucioListFileReplicas()

std::vector< RucioListFileReplicasEntry > SH::rucioListFileReplicas ( const std::string & dataset)

run rucio-list-file-replicas for the given dataset

Guarantee
strong
Failures
grid utility failures
Precondition
!dataset.empty()

Definition at line 384 of file GridTools.cxx.

385 {
386 RCU_REQUIRE_SOFT (!dataset.empty());
387
389
390 static const std::string separator = "------- SampleHandler Split -------";
391 std::vector<RucioListFileReplicasEntry> result;
392
393 std::string command = rucioSetupCommand() + " && echo " + separator + " && rucio list-file-replicas --protocols root " + sh::quote (dataset);
394
395 ANA_MSG_INFO ("querying rucio for dataset " << dataset);
396 std::string output = sh::exec_read ( command );
397 auto split = output.rfind (separator + "\n");
398 if (split == std::string::npos)
399 RCU_THROW_MSG ("couldn't find separator in: " + output);
400
401 std::istringstream str (output.substr (split + separator.size() + 1));
402 std::regex pattern ("^\\| +([^ ]+) +\\| +([^ ]+) +\\| +([^ ]+ [^ ]+) +\\| +([^ ]+) +\\| +([^: ]+): ([^ ]+) +\\| *$");
403 std::string line;
404 while (std::getline (str, line))
405 {
406 std::smatch what;
407 if (std::regex_match (line, what, pattern) &&
408 what[1] != "SCOPE")
409 {
411 entry.scope = what[1];
412 entry.name = what[2];
413 entry.filesize = what[3];
414 entry.adler32 = what[4];
415 entry.disk = what[5];
416 entry.replica = what[6];
417 result.push_back (entry);
418 }
419 }
420 return result;
421 }
one entry from the rucio-list-file-replicas command
Definition GridTools.h:131

◆ scanDir() [1/3]

void SH::scanDir ( SampleHandler & sh,
const std::string & dir )

Definition at line 62 of file ToolsDiscovery.cxx.

63 {
64 ScanDir()
65 .sampleDepth (0)
66 .minDepth (1)
67 .maxDepth (1)
68 .scan (sh, dir);
69 }
the class used for scanning local directories and file servers for samples
Definition ScanDir.h:38
const ScanDir & scan(SampleHandler &sh, const std::string &dir) const
scan the given directory and put the created samples into the sample handler
Definition ScanDir.cxx:168
ScanDir & sampleDepth(int val_sampleDepth)
the index of the file hierarchy at which we gather the sample name.
Definition ScanDir.cxx:47
ScanDir & maxDepth(std::size_t val_maxDepth)
the maximum depth for files to make it into the sample
Definition ScanDir.cxx:85
ScanDir & minDepth(std::size_t val_minDepth)
the minimum depth for files to make it into the sample
Definition ScanDir.cxx:76

◆ scanDir() [2/3]

void SH::scanDir ( SampleHandler & sh,
const std::string & dir,
const std::string & prefix )

Definition at line 73 of file ToolsDiscovery.cxx.

75 {
76 DiskListLocal list (dir, prefix);
77 ScanDir()
78 .sampleDepth (0)
79 .minDepth (1)
80 .maxDepth (1)
81 .scan (sh, list);
82 }
a DiskList implementation for local directories

◆ scanDir() [3/3]

void SH::scanDir ( SampleHandler & sh,
DiskList & list,
const std::string & pattern = "*.root*",
const std::string & samplePattern = "*",
const std::string & samplePostfix = "" )

effects: scan the given directory and add all subdirectories as samples that contain root files.

if prefix is provided, use that instead of dir when constructing the file names. guarantee: basic, only some samples might be added failures: low level errors IV failures: directory not found failures: duplicate samples rationale: the prefix option is for the Tier 3 prototype, where you scan the local directory, but then access the files through xrootd

Definition at line 45 of file ToolsDiscovery.cxx.

49 {
50 ScanDir()
51 .sampleDepth (0)
52 .minDepth (1)
53 .maxDepth (1)
54 .filePattern (pattern)
55 .samplePattern (samplePattern)
56 .samplePostfix (samplePostfix)
57 .scan (sh, list);
58 }
ScanDir & samplePattern(const std::string &val_samplePattern)
the pattern for samples to be accepted
Definition ScanDir.cxx:130
ScanDir & samplePostfix(const std::string &val_samplePostfix)
the pattern for the postfix to be stripped from the sampleName
Definition ScanDir.cxx:139
ScanDir & filePattern(const std::string &val_filePattern)
the pattern for files to be accepted
Definition ScanDir.cxx:94

◆ scanDQ2()

void SH::scanDQ2 ( SampleHandler & sh,
const std::string & pattern )

effects: make a list from DQ2 using the given pattern guarantee: basic, may add partially failures: out of memory III failures: dq2-ls errors

Definition at line 124 of file ToolsDiscovery.cxx.

125 {
126 if (pattern.find ("*") == std::string::npos)
127 {
128 addGrid (sh, pattern);
129 } else
130 {
131 std::set<std::string> types = {"DATASET", "DIDType.DATASET"};
132 if (pattern.back() == '/')
133 types = {"CONTAINER", "DIDType.CONTAINER"};
134
135 auto subresult = rucioListDids (pattern);
136 for (auto& ds : subresult)
137 {
138 if (types.find (ds.type) != types.end())
139 addGrid (sh, ds.scope + ":" + ds.name);
140 }
141 }
142 }
static const std::vector< std::string > types
void addGrid(SampleHandler &sh, const std::string &ds)
effects: add a grid dataset for dataset ds guarantee: strong failures: out of memory II requires: ds....
std::vector< RucioListDidsEntry > rucioListDids(const std::string &dataset)
run rucio-list-dids for the given dataset

◆ scanFiles()

void SH::scanFiles ( SampleHandler & sh,
DiskList & list,
const std::string & pattern = "*.root*" )

effects: scan the given directory tree and make a separate sample for each file (using the file name before any "." as the sample name).

guarantee: basic, only some samples might be added failures: out of memory III failures: i/o errors failures: duplicate samples

Definition at line 86 of file ToolsDiscovery.cxx.

88 {
89 ScanDir()
90 .sampleDepth (-1)
91 .filePattern (pattern)
92 .samplePostfix (".*")
93 .scan (sh, list);
94 }

◆ scanForTrees() [1/2]

void SH::scanForTrees ( SampleHandler & sh,
const std::string & pattern )

Definition at line 357 of file ToolsDiscovery.cxx.

358 {
359 SH::SampleHandler sh_new;
360
361 for (SampleHandler::iterator sample = sh.begin(),
362 end = sh.end(); sample != end; ++ sample)
363 {
364 scanForTrees (sh_new, **sample, pattern);
365 }
366 swap (sh, sh_new);
367 }
void scanForTrees(SampleHandler &sh, Sample &sample, const std::string &pattern)
effects: scan for trees in the given sample (or sample handler), and create a separate sample for eac...

◆ scanForTrees() [2/2]

void SH::scanForTrees ( SampleHandler & sh,
Sample & sample,
const std::string & pattern = ".*" )

effects: scan for trees in the given sample (or sample handler), and create a separate sample for each tree.

if pattern is specified it is applied to the allowed tree names. guarantee: strong failures: out of memory III failures: i/o errors

Definition at line 326 of file ToolsDiscovery.cxx.

328 {
329 SamplePtr mysample = sample.makeLocal();
330 if (mysample->numFiles() == 0)
331 {
332 sh.add (&sample);
333 return;
334 }
335 std::unique_ptr<TFile> file (TFile::Open (mysample->fileName(0).c_str()));
336 if (!file.get())
337 RCU_THROW_MSG ("could not open file: " + mysample->fileName(0));
338 TObject *object = 0;
339 std::regex mypattern (pattern);
340 for (TIter iter (file->GetListOfKeys()); (object = iter.Next()); )
341 {
342 if (RCU::match_expr (mypattern, object->GetName()) &&
343 dynamic_cast<TTree*>(file->Get(object->GetName())))
344 {
345 std::string newName = sample.name() + "_" + object->GetName();
346 std::unique_ptr<Sample> newSample
347 (dynamic_cast<Sample*>(sample.Clone (newName.c_str())));
348 newSample->name (newName);
349 newSample->meta()->setString (MetaFields::treeName, object->GetName());
350 sh.add (newSample.release());
351 }
352 }
353 }
A smart pointer class that holds a single Sample object.
Definition SamplePtr.h:35
std::size_t numFiles() const
the number of files in the sample
std::string fileName(std::size_t index) const
the name of the file with the given index

◆ scanNEvents() [1/2]

void SH::scanNEvents ( Sample & sample)

effects: scan the given sample and store the number of entries per file in the meta-data guarantee: strong failures: out of memory failures: read errors failures: invalid sample type

Definition at line 47 of file ToolsSplit.cxx.

48 {
49 SampleLocal *const mysample = dynamic_cast<SampleLocal*>(&sample);
50 if (!mysample)
51 RCU_THROW_MSG ("sample not of type SampleLocal");
52
53 const std::string tree_name = sample.meta()->castString (MetaFields::treeName, MetaFields::treeName_default);
54 if (tree_name.empty())
55 RCU_THROW_MSG ("sample doesn't contain a tree name");
56
57 Long64_t tot_entries = 0;
58 std::vector<Long64_t> entries;
59 const auto fileList = sample.makeFileList ();
60 for (const std::string& file_name : fileList)
61 {
62 std::unique_ptr<TFile> file (TFile::Open (file_name.c_str(), "READ"));
63 if (!file.get())
64 RCU_THROW_MSG ("failed to open file " + file_name);
65 Long64_t treeEntries = 0;
66 TTree *const tree = dynamic_cast<TTree*>(file->Get (tree_name.c_str()));
67 if (tree != 0)
68 treeEntries = tree->GetEntries ();
69 entries.push_back (treeEntries);
70 tot_entries += treeEntries;
71 }
72 RCU_ASSERT (entries.size() == fileList.size());
73 sample.meta()->addReplace (new MetaVector<Long64_t>(MetaFields::numEventsPerFile, entries));
74 sample.meta()->setDouble (MetaFields::numEvents, tot_entries);
75 }
double entries
Definition listroot.cxx:49
static const std::string treeName_default
the default value of treeName
Definition MetaFields.h:55
static const std::string numEventsPerFile
the number of events in each file
Definition MetaFields.h:67
TChain * tree

◆ scanNEvents() [2/2]

void SH::scanNEvents ( SampleHandler & sh)

effects: scan each sample in the sample handler and store the number of entries per file in the meta-data guarantee: basic, may only scan some failures: out of memory failures: read errors failures: invalid sample type

Definition at line 38 of file ToolsSplit.cxx.

39 {
40 for (SampleHandler::iterator sample = sh.begin(),
41 end = sh.end(); sample != end; ++ sample)
42 scanNEvents (**sample);
43 }
void scanNEvents(SampleHandler &sh)
effects: scan each sample in the sample handler and store the number of entries per file in the meta-...

◆ scanRucio()

void SH::scanRucio ( SampleHandler & sh,
const std::string & pattern,
bool alwaysQuery = false )

make a list of grid datasets using the given pattern

This will first look for dataset containers, and if none are found, it will instead look for datasets. If those are also not found it will produce an error.

For patterns that don't contain a wild-card (i.e. that can only match one dataset/container) it instead adds the dataset directly without querying rucio. This is a performance optimization based on the assumption that you probably checked the dataset names beforehand, and that if you made a mistake it will pop up at a later stage anyways. If you want to query rucio even in those cases set alwaysQuery to true.

Guarantee
basic, may add some datasets
Failures
no pattern match
rucio failures
out of memory III

Definition at line 146 of file ToolsDiscovery.cxx.

148 {
149 if (pattern.find ("*") == std::string::npos && !alwaysQuery)
150 {
151 addGrid (sh, pattern);
152 } else
153 {
154 auto subresult = rucioListDids (pattern);
155 bool added = false;
156 for (std::string type : {"CONTAINER", "DIDType.CONTAINER", "DATASET", "DIDType.DATASET"})
157 {
158 for (auto& ds : subresult)
159 {
160 if (ds.type == type)
161 {
162 addGrid (sh, ds.scope + ":" + ds.name);
163 added = true;
164 }
165 }
166 if (added)
167 return;
168 }
169 RCU_THROW_MSG ("failed to find any datasets matching pattern: " + pattern);
170 }
171 }

◆ scanSingleDir()

void SH::scanSingleDir ( SampleHandler & sh,
const std::string & name,
DiskList & list,
const std::string & pattern )

effects: scan the given directory tree and turn it into a single sample of the given name guarantee: strong failures: out of memory III

Definition at line 112 of file ToolsDiscovery.cxx.

114 {
115 ScanDir()
116 .sampleDepth (0)
117 .filePattern (pattern)
118 .sampleRename ("*", name)
119 .scan (sh, list);
120 }
ScanDir & sampleRename(const std::string &pattern, const std::string &name)
rename any sample matching pattern to name
Definition ScanDir.cxx:149

◆ splitSample()

SampleHandler SH::splitSample ( Sample & sample,
Long64_t nevt )

effects: split the given sample into a set of samples, with each sample containing either exactly one file or at most nevt events side effects: if scanNEvents hasn't been run on this sample, run it.

guarantee: strong failures: out of memory failures: scanning errors

Definition at line 79 of file ToolsSplit.cxx.

80 {
81 if (!dynamic_cast<SampleLocal*>(&sample))
82 RCU_THROW_MSG ("sample not of type SampleLocal");
83
84 TObject *meta = sample.meta()->get (MetaFields::numEventsPerFile);
85 if (!meta)
86 {
87 RCU_WARN_MSG ("sample " + sample.name() + " lacks nc_nevtfile, running scanNEvents, please save sample");
88 scanNEvents (sample);
89 meta = sample.meta()->get (MetaFields::numEventsPerFile);
90 }
91 RCU_ASSERT (meta != 0);
92 MetaVector<Long64_t> *const nentries
93 = dynamic_cast<MetaVector<Long64_t> *>(meta);
94 if (nentries == 0)
95 RCU_THROW_MSG ("nc_nevtfile is of the wrong type");
96 if (nentries->value.size() != sample.numFiles())
97 RCU_THROW_MSG ("nc_nevtfile has the wrong number of entries");
98
99 SampleHandler result;
100 std::unique_ptr<SampleLocal> res;
101 Long64_t num = 0;
102 const std::string meta_tree = sample.meta()->castString (MetaFields::treeName, MetaFields::treeName_default);
103 const double meta_xs = sample.meta()->castDouble (MetaFields::crossSection, 0);
104 for (unsigned file = 0, end = nentries->value.size(); file != end; ++ file)
105 {
106 if (num > 0 && num + nentries->value[file] > nevt)
107 {
108 result.add (res.release());
109 num = 0;
110 }
111 if (res.get() == 0)
112 {
113 std::ostringstream name;
114 name << sample.name() << "_" << result.size();
115 res.reset (new SampleLocal (name.str()));
116 res->tags (sample.tags());
117 res->meta()->fetch (*sample.meta());
118 if (!meta_tree.empty())
119 res->meta()->setString (MetaFields::treeName, meta_tree);
120 if (meta_xs != 0)
121 res->meta()->setDouble (MetaFields::crossSection, meta_xs);
122 }
123 res->add (sample.fileName (file));
124 num += nentries->value[file];
125 }
126 if (num > 0)
127 result.add (res.release());
128 return result;
129 }
std::pair< std::vector< unsigned int >, bool > res