ATLAS Offline Software
Loading...
Searching...
No Matches
GridTools.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3*/
4
6
7
8//
9// includes
10//
11
13
21#include <TSystem.h>
22#include <chrono>
23#include <fstream>
24#include <mutex>
25
26namespace sh = RCU::Shell;
27
28//
29// method implementations
30//
31
32namespace SH
33{
34 ANA_MSG_SOURCE (msgGridTools, "SampleHandler_GridTools")
35 using namespace msgGridTools;
36
37 namespace
38 {
39 struct ProxyData
40 {
41 // the clock we use
42 typedef std::chrono::steady_clock clock;
43
44 // don't really need a mutex as the code unlikely to be
45 // multi-threaded, but may just as well put one to protect the
46 // global/static variable
47 std::recursive_mutex mutex;
48
49 // whether we have confirmed that we do have a proxy
50 bool haveProxy = false;
51
52 // the expiration time of the proxy (if we have one)
53 decltype(clock::now()) proxyExpiration;
54
55 bool checkVomsProxy ()
56 {
57 std::lock_guard<std::recursive_mutex> lock (mutex);
58
59 if (haveProxy == false)
60 {
61 ANA_MSG_INFO ("checking for valid grid proxy");
62 int rc = 0;
63 std::string output =
64 RCU::Shell::exec_read ("voms-proxy-info --actimeleft", rc);
65 if (rc != 0)
66 {
67 ANA_MSG_INFO ("no valid proxy found");
68 } else
69 {
70 std::istringstream str (output);
71 unsigned seconds = 0;
72
73 if (!(str >> seconds))
74 {
75 // Output format is more complicated if RPM isn't installed
76 std::istringstream str2 (output.substr(output.rfind('\n',output.size()-2)+1,std::string::npos));
77
78 if (!(str2 >> seconds)){
79 ANA_MSG_INFO ("failed to parse command output: " << output);
80 } else
81 {
82 proxyExpiration = clock::now() + std::chrono::seconds (seconds);
83 haveProxy = true;
84 } // Second try was successful
85
86 } else
87 {
88 proxyExpiration = clock::now() + std::chrono::seconds (seconds);
89 haveProxy = true;
90 } // First try was successful
91 }
92 }
93
94 return haveProxy &&
95 proxyExpiration > clock::now() + std::chrono::minutes (20);
96 }
97
98 void ensureVomsProxy ()
99 {
100 std::lock_guard<std::recursive_mutex> lock (mutex);
101
102 if (checkVomsProxy())
103 return;
104
105 if (haveProxy)
106 {
107 ANA_MSG_INFO ("proxy expired or about to expire");
108 } else
109 {
110 ANA_MSG_INFO ("no proxy found");
111 }
112 ANA_MSG_INFO ("trying to set up a new proxy");
113 haveProxy = false;
114 RCU::Shell::exec ("voms-proxy-init -voms atlas");
116 }
117 };
118
119 ProxyData& proxyData ()
120 {
121 // Methods of ProxyData() are thread-safe.
122 static ProxyData result ATLAS_THREAD_SAFE;
123 return result;
124 }
125
126
127
130 std::vector<std::string>
131 readLineList (const std::string& text,
132 const std::string& begin)
133 {
134 std::vector<std::string> result;
135
136 for (std::string::size_type split = 0;
137 (split = text.find (begin, split)) != std::string::npos;
138 ++ split)
139 {
140 if (split == 0 || text[split-1] == '\n')
141 {
142 split += begin.size();
143 auto split2 = text.find ("\n", split);
144 if (split2 == std::string::npos)
145 split2 = text.size();
146 std::string subresult = text.substr (split, split2 - split);
147 while (isspace (subresult.front()))
148 subresult = subresult.substr (1);
149 while (isspace (subresult.back()))
150 subresult.pop_back();
151 result.push_back (std::move (subresult));
152 }
153 }
154 return result;
155 }
156
157
158
161 std::string readLine (const std::string& text,
162 const std::string& begin)
163 {
164 auto lines = readLineList (text, begin);
165 if (lines.empty())
166 RCU_THROW_MSG ("failed to find line starting with: " + begin);
167 if (lines.size() > 1)
168 RCU_THROW_MSG ("multiple lines starting with: " + begin);
169 return lines.at(0);
170 }
171
172
173
176 unsigned readLineUnsigned (const std::string& text,
177 const std::string& begin)
178 {
179 const auto line = readLine (text, begin);
180 std::istringstream str (line);
181 unsigned result = 0;
182 if (!(str >> result) || !str.eof())
183 RCU_THROW_MSG ("failed to convert " + line + " into an unsigned");
184 return result;
185 }
186
187
188
190 std::string rucioSetupCommand ()
191 {
192 return "source $ATLAS_LOCAL_ROOT_BASE/user/atlasLocalSetup.sh -q && lsetup --force 'rucio -w'";
193 }
194 }
195
196
197
198 const std::string& downloadStageEnvVar ()
199 {
200 static const std::string result = "SAMPLEHANDLER_RUCIO_DOWNLOAD";
201 return result;
202 }
203
204
205
207 {
208 return proxyData().checkVomsProxy();
209 }
210
211
212
214 {
215 proxyData().ensureVomsProxy();
216 }
217
218
219
220 std::vector<std::string>
221 faxListFilesGlob (const std::string& name, const std::string& filter)
222 {
223#pragma GCC diagnostic push
224#pragma GCC diagnostic ignored "-Wpragmas"
225#pragma GCC diagnostic ignored "-Wunknown-pragmas"
226#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
227 return faxListFilesRegex (name, RCU::glob_to_regexp (filter));
228#pragma GCC diagnostic pop
229 }
230
231
232
233 std::vector<std::string>
234 faxListFilesRegex (const std::string& name, const std::string& filter)
235 {
236 RCU_REQUIRE_SOFT (!name.empty());
237 RCU_REQUIRE_SOFT (name.find('*') == std::string::npos);
238 RCU_REQUIRE_SOFT (!filter.empty());
239
241
242 static const std::string separator = "------- SampleHandler Split -------";
243 std::vector<std::string> result;
244
245 ANA_MSG_INFO ("querying FAX for dataset " << name);
246 std::string output = sh::exec_read ("source $ATLAS_LOCAL_ROOT_BASE/user/atlasLocalSetup.sh -q && lsetup --force fax && echo " + separator + " && fax-get-gLFNs " + sh::quote (name));
247 auto split = output.rfind (separator + "\n");
248 if (split == std::string::npos)
249 RCU_THROW_MSG ("couldn't find separator in: " + output);
250
251 std::istringstream str (output.substr (split + separator.size() + 1));
252 std::regex pattern (filter);
253 std::string line;
254 while (std::getline (str, line))
255 {
256 if (!line.empty())
257 {
258 if (!line.starts_with ("root:"))
259 RCU_THROW_MSG ("couldn't parse line: " + line);
260
261 std::string::size_type split1 = line.rfind (":");
262 std::string::size_type split2 = line.rfind ("/");
263 if (split1 < split2)
264 split1 = split2;
265 if (split1 != std::string::npos)
266 {
267 if (RCU::match_expr (pattern, line.substr (split1+1)))
268 result.push_back (line);
269 } else
270 RCU_THROW_MSG ("couldn't parse line: " + line);
271 }
272 }
273 if (result.size() == 0)
274 RCU_WARN_MSG ("dataset " + name + " did not contain any files. this is likely not right");
275 return result;
276 }
277
278
279
280 std::vector<std::string>
281 rucioDirectAccessGlob (const std::string& name, const std::string& filter,
282 const std::string& selectOptions)
283 {
284 return rucioDirectAccessRegex (name, RCU::glob_to_regexp (filter),
285 selectOptions);
286 }
287
288
289
290 std::vector<std::string>
291 rucioDirectAccessRegex (const std::string& name, const std::string& filter,
292 const std::string& selectOptions)
293 {
294 RCU_REQUIRE_SOFT (!name.empty());
295 RCU_REQUIRE_SOFT (name.find('*') == std::string::npos);
296 RCU_REQUIRE_SOFT (!filter.empty());
297
299
300 static const std::string separator = "------- SampleHandler Split -------";
301
302 ANA_MSG_INFO ("querying rucio for dataset " << name);
303 std::string output = sh::exec_read (rucioSetupCommand() + " && echo " + separator + " && rucio list-file-replicas --pfns --protocols root " + selectOptions + " " + sh::quote (name));
304 auto split = output.rfind (separator + "\n");
305 if (split == std::string::npos)
306 RCU_THROW_MSG ("couldn't find separator in: " + output);
307 std::istringstream str (output.substr (split + separator.size() + 1));
308
309 // this is used to avoid getting two copies of the same file. we
310 // first fill them in a map by filename, then copy them into a
311 // vector
312 std::map<std::string,std::string> resultMap;
313
314 std::regex urlPattern ("^root://.*");
315 std::regex pattern (filter);
316 std::string line;
317 while (std::getline (str, line))
318 {
319 if (line.empty())
320 {
321 // no-op
322 } else if (!RCU::match_expr (urlPattern, line))
323 {
324 ANA_MSG_INFO ("couldn't handle line: " << line);
325 } else
326 {
327 std::string::size_type split = line.rfind ("/");
328 if (split != std::string::npos)
329 {
330 std::string filename = line.substr (split+1);
331 if (RCU::match_expr (pattern, filename))
332 resultMap[filename] = line;
333 } else
334 RCU_THROW_MSG ("couldn't parse line: " + line);
335 }
336 }
337
338 std::vector<std::string> result;
339 for (const auto& file : resultMap)
340 result.push_back (file.second);
341 if (result.size() == 0)
342 ANA_MSG_WARNING ("dataset " + name + " did not contain any files. this is likely not right");
343 return result;
344 }
345
346
347
348 std::vector<RucioListDidsEntry> rucioListDids (const std::string& dataset)
349 {
350 RCU_REQUIRE_SOFT (!dataset.empty());
351
353
354 static const std::string separator = "------- SampleHandler Split -------";
355 std::vector<RucioListDidsEntry> result;
356
357 ANA_MSG_INFO ("querying rucio for dataset " << dataset);
358 std::string output = sh::exec_read (rucioSetupCommand() + " && echo " + separator + " && rucio list-dids " + sh::quote (dataset));
359 auto split = output.rfind (separator + "\n");
360 if (split == std::string::npos)
361 RCU_THROW_MSG ("couldn't find separator in: " + output);
362
363 std::istringstream str (output.substr (split + separator.size() + 1));
364 std::regex pattern ("^\\| ([a-zA-Z0-9_.-]+):([a-zA-Z0-9_.-]+) +\\| ([a-zA-Z0-9_.-]+) +\\| *$");
365 std::string line;
366 while (std::getline (str, line))
367 {
368 std::smatch what;
369 if (std::regex_match (line, what, pattern))
370 {
371 RucioListDidsEntry entry;
372 entry.scope = what[1];
373 entry.name = what[2];
374 entry.type = what[3];
375 result.push_back (entry);
376 }
377 }
378 return result;
379 }
380
381
382
383 std::vector<RucioListFileReplicasEntry>
384 rucioListFileReplicas (const std::string& dataset)
385 {
386 RCU_REQUIRE_SOFT (!dataset.empty());
387
389
390 static const std::string separator = "------- SampleHandler Split -------";
391 std::vector<RucioListFileReplicasEntry> result;
392
393 std::string command = rucioSetupCommand() + " && echo " + separator + " && rucio list-file-replicas --protocols root " + sh::quote (dataset);
394
395 ANA_MSG_INFO ("querying rucio for dataset " << dataset);
396 std::string output = sh::exec_read ( command );
397 auto split = output.rfind (separator + "\n");
398 if (split == std::string::npos)
399 RCU_THROW_MSG ("couldn't find separator in: " + output);
400
401 std::istringstream str (output.substr (split + separator.size() + 1));
402 std::regex pattern ("^\\| +([^ ]+) +\\| +([^ ]+) +\\| +([^ ]+ [^ ]+) +\\| +([^ ]+) +\\| +([^: ]+): ([^ ]+) +\\| *$");
403 std::string line;
404 while (std::getline (str, line))
405 {
406 std::smatch what;
407 if (std::regex_match (line, what, pattern) &&
408 what[1] != "SCOPE")
409 {
411 entry.scope = what[1];
412 entry.name = what[2];
413 entry.filesize = what[3];
414 entry.adler32 = what[4];
415 entry.disk = what[5];
416 entry.replica = what[6];
417 result.push_back (entry);
418 }
419 }
420 return result;
421 }
422
423
424
425 std::map<std::string,std::unique_ptr<MetaObject> >
426 rucioGetMetadata (const std::set<std::string>& datasets)
427 {
428 RCU_REQUIRE_SOFT (!datasets.empty());
429
431
432 static const std::string separator = "------- SampleHandler Split -------";
433 std::map<std::string,std::unique_ptr<MetaObject> > result;
434
435 std::string command = rucioSetupCommand() + " && echo " + separator + " && rucio get-metadata";
436 for (auto& dataset : datasets)
437 {
438 RCU_REQUIRE_SOFT (!dataset.empty());
439 command += " " + sh::quote (dataset);
440 }
441
442 ANA_MSG_INFO ("querying rucio for meta-data");
443 std::string output = sh::exec_read (command);
444 auto split = output.rfind (separator + "\n");
445 if (split == std::string::npos)
446 RCU_THROW_MSG ("couldn't find separator in: " + output);
447
448 std::istringstream str (output.substr (split + separator.size() + 1));
449 std::regex pattern ("^([^:]+): *(.+)$");
450 std::string line;
451 std::unique_ptr<MetaObject> meta (new MetaObject);
452
453 auto addMeta = [&] ()
454 {
455 std::string name = meta->castString ("scope") + ":" + meta->castString ("name");
456 if (result.find (name) != result.end())
457 RCU_THROW_MSG ("read " + name + " twice");
458 result[name] = std::move (meta);
459 };
460
461 while (std::getline (str, line))
462 {
463 std::smatch what;
464 if (line == "------")
465 {
466 addMeta ();
467 meta.reset (new MetaObject);
468 } else if (std::regex_match (line, what, pattern))
469 {
470 if (meta->get (what[1]))
471 throw std::runtime_error (std::string("duplicate entry: ") + what[1].str());
472 meta->setString (what[1], what[2]);
473 } else if (!line.empty())
474 {
475 ANA_MSG_WARNING ("couldn't parse line: " << line);
476 }
477 }
478 addMeta ();
479
480 for (auto& subresult : result)
481 {
482 if (datasets.find (subresult.first) == datasets.end())
483 RCU_THROW_MSG ("received result for dataset not requested: " + subresult.first);
484 }
485 for (auto& dataset : datasets)
486 {
487 if (result.find (dataset) == result.end())
488 RCU_THROW_MSG ("received no result for dataset: " + dataset);
489 }
490
491 return result;
492 }
493
494
495
496 RucioDownloadResult rucioDownload (const std::string& location,
497 const std::string& dataset)
498 {
500
501 const std::string separator = "------- SampleHandler Split -------";
502 std::string command = rucioSetupCommand() + " && echo " + separator + " && cd " + sh::quote (location) + " && rucio download " + sh::quote (dataset) + " 2>&1";
503
504 ANA_MSG_INFO ("starting rucio download " + dataset + " into " + location);
505 std::string output = sh::exec_read (command);
506 auto split = output.rfind (separator + "\n");
507 if (split == std::string::npos)
508 RCU_THROW_MSG ("couldn't find separator in: " + output);
509 output = output.substr (split + separator.size() + 1);
510
512 result.did = readLine (output, "DID ");
513 result.totalFiles = readLineUnsigned (output, "Total files (DID): ");
514 result.downloadedFiles = readLineUnsigned (output, "Downloaded files: ");
515 result.alreadyLocal = readLineUnsigned (output, "Files already found locally: ");
516 result.notDownloaded = readLineUnsigned (output, "Files that cannot be downloaded: ");
517 return result;
518 }
519
520
521
522 std::vector<RucioDownloadResult>
523 rucioDownloadList (const std::string& location,
524 const std::vector<std::string>& datasets)
525 {
526 std::vector<RucioDownloadResult> result;
527 for (auto& dataset : datasets)
528 result.push_back (rucioDownload (location, dataset));
529 return result;
530 }
531
532
533
534 std::vector<std::string>
535 rucioCacheDatasetGlob (const std::string& location,
536 const std::string& dataset,
537 const std::string& fileGlob)
538 {
539 std::vector<std::string> result;
540
541 std::string path = location;
542 if (path.back() != '/')
543 path += "/";
544 if (dataset.find (':') != std::string::npos)
545 path += dataset.substr (dataset.find (':')+1);
546 else
547 path += dataset;
548 const std::string finished {
549 path + "-finished"};
550
551 // check if the finished file does not exist
552 // note that AccessPathName has the weirdest calling convention
553 if (gSystem->AccessPathName (finished.c_str()) != 0)
554 {
555 RucioDownloadResult status = rucioDownload (location, dataset);
556 if (status.downloadedFiles + status.alreadyLocal < status.totalFiles)
557 throw std::runtime_error ("failed to download all files of " + dataset);
558 // this just creates an empty file
559 std::ofstream (finished.c_str());
560 }
561
562 std::string output = sh::exec_read ("find " + sh::quote (path) + " -type f -name " + sh::quote (fileGlob));
563 std::istringstream str (output);
564 std::string line;
565 while (std::getline (str, line))
566 {
567 if (!line.empty())
568 result.push_back (line);
569 }
570 return result;
571 }
572}
#define RCU_REQUIRE_SOFT(x)
Definition Assert.h:153
macros for messaging and checking status codes
#define ANA_MSG_INFO(xmsg)
Macro printing info messages.
#define ANA_MSG_WARNING(xmsg)
Macro printing warning messages.
#define ANA_MSG_SOURCE(NAME, TITLE)
the source code part of ANA_MSG_SOURCE
static Double_t rc
#define RCU_THROW_MSG(message)
Definition PrintMsg.h:58
#define RCU_WARN_MSG(message)
Definition PrintMsg.h:52
Define macros for attributes used to control the static checker.
#define ATLAS_THREAD_SAFE
A class that manages meta-data to be associated with an object.
Definition MetaObject.h:56
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
std::string exec_read(const std::string &cmd)
effects: execute the given command and return the output returns: the output of the command guarantee...
Definition ShellExec.cxx:37
void exec(const std::string &cmd)
effects: execute the given command guarantee: strong failures: out of memory II failures: system fail...
Definition ShellExec.cxx:29
std::string quote(const std::string &name)
effects: quote the given name to protect it from the shell returns: the quoted name guarantee: strong...
Definition ShellExec.cxx:75
bool match_expr(const std::regex &expr, const std::string &str)
returns: whether we can match the entire string with the regular expression guarantee: strong failure...
std::string glob_to_regexp(const std::string &glob)
returns: a string that is the regular expression equivalent of the given glob expression guarantee: s...
This module provides a lot of global definitions, forward declarations and includes that are used by ...
Definition PrunDriver.h:15
std::vector< RucioDownloadResult > rucioDownloadList(const std::string &location, const std::vector< std::string > &datasets)
run rucio-download with multiple datasets
RucioDownloadResult rucioDownload(const std::string &location, const std::string &dataset)
run rucio-download
const std::string & downloadStageEnvVar()
the name of the environment variable containing the directory for staging files from the grid
std::vector< std::string > faxListFilesGlob(const std::string &name, const std::string &filter)
list the FAX URLs for all the files in the dataset or dataset container matching the given filter (as...
std::vector< RucioListFileReplicasEntry > rucioListFileReplicas(const std::string &dataset)
run rucio-list-file-replicas for the given dataset
std::vector< RucioListDidsEntry > rucioListDids(const std::string &dataset)
run rucio-list-dids for the given dataset
void ensureVomsProxy()
ensure that we have a valid VOMS proxy available
std::map< std::string, std::unique_ptr< MetaObject > > rucioGetMetadata(const std::set< std::string > &datasets)
run rucio-get-metadata for the given list of datasets
std::vector< std::string > rucioDirectAccessRegex(const std::string &name, const std::string &filter, const std::string &selectOptions)
list the rucio URLs for all the files in the dataset or dataset container matching the given filter (...
std::vector< std::string > rucioCacheDatasetGlob(const std::string &location, const std::string &dataset, const std::string &fileGlob)
download the dataset, and return a list matching the pattern
bool checkVomsProxy()
return whether we have a valid VOMS proxy available
std::vector< std::string > faxListFilesRegex(const std::string &name, const std::string &filter)
list the FAX URLs for all the files in the dataset or dataset container matching the given filter (as...
std::vector< std::string > rucioDirectAccessGlob(const std::string &name, const std::string &filter, const std::string &selectOptions)
list the rucio URLs for all the files in the dataset or dataset container matching the given filter (...
output
Definition merge.py:16
-diff
the result from rucio_download
Definition GridTools.h:175
one entry from the rucio-list-dids command
Definition GridTools.h:107
one entry from the rucio-list-file-replicas command
Definition GridTools.h:131
TFile * file