ATLAS Offline Software
Loading...
Searching...
No Matches
DiskUtils.py
Go to the documentation of this file.
1# Copyright (C) 2002-2021 CERN for the benefit of the ATLAS collaboration
2
3import glob
4import os
5import re
6import subprocess
7import functools
8
9# DEPRECATED CODE #############################################################
10
11import sys
12from functools import wraps
13def deprecated(message):
14 def deco(fn):
15 @wraps(fn)
16 def wrapper(*args, **kwargs):
17 print('WARNING: [InDetBeamSpotExample.DiskUtils]',
18 '{}() is deprecated and will be removed'.format( fn.__name__),
19 file=sys.stderr)
20 print('WARNING: ', message,
21 file=sys.stderr)
22 return fn(*args, **kwargs)
23 return wrapper
24 return deco
25
26import fnmatch
27from collections import namedtuple
28StorageManager = namedtuple('StorageManager', ['name', 'prefix', 'cp', 'ls', 'longls'])
29CastorMgr = StorageManager(name='castor', prefix='root://castoratlas/', cp='xrdcp', ls='nsls %s', longls='nsls -l %s')
30RFIOMgr = StorageManager(name='rfio', prefix='rfio:', cp='rfcp', ls='rfdir %s', longls='rfdir %s')
31EOSMgr = StorageManager(name='eos', prefix='root://eosatlas.cern.ch/', cp='xrdcp', ls='/bin/sh -l -c "LD_LIBRARY_PATH=/usr/lib64/ eos ls %s"', longls='/bin/sh -l -c "LD_LIBRARY_PATH=/usr/lib64/ eos ls -l %s"')
32UnixMgr = StorageManager(name='unix', prefix='', cp='cp', ls='ls %s', longls='ls -l %s')
33
34def _rationalise(path):
35 """
36 Rationalise a path, removing prefix and esuring single leading slash
37 """
38 for p in ('root://castoratlas/', 'root://eosatlas.cern.ch/', 'rfio:', 'castor:'):
39 if path.startswith(p):
40 path = path[len(p):]
41 if path.startswith('//'):
42 path = path[1:]
43 if not path.startswith('/'):
44 path = '/'+path
45 break
46
47 return path
48
49@deprecated("EOS is mounted on /eos with fuse, so you probably don't need this abstraction")
51 """
52 Return SotrageManager to deal with listing, copying and reading files from various storage systems
53 """
54 name = _rationalise(name)
55 if name.startswith('/castor/'):
56 return CastorMgr
57 elif name.startswith('/eos/'):
58 return EOSMgr
59 else:
60 return UnixMgr
61
62@deprecated("DiskUtils.FileSet replaces this functionality")
63def filelist(files, prefix=None):
64 """
65 lists CASTOR/EOS name server directory/file entries.
66 If path is a directory, filelist lists the entries in the directory;
67 they are sorted alphabetically.
68
69 `files` specifies the CASTOR/EOS pathname.
70 `prefix` specifies the prefix one wants to prepend to the path found.
71 (e.g. prefix='root://castoratlas/' or 'root://eosatlas.cern.ch//')
72 if prefix=True it will determin the prefix based on the pathname
73
74 ex:
75 filelist('/castor/cern.ch/atlas/*')
76 filelist('/castor/cern.ch/atl*/foo?[bar]/*.pool.root.?')
77 filelist('/eos/atlas/*', prefix='root://eosatlas.cern.ch/')
78 filelist('/castor/cern.ch/atlas/*', prefix=True)
79 """
80
81 path, fname = os.path.split(files)
82 path = _rationalise(path)
83
84 if ( path.count('*') > 0 or path.count('?') > 0 or
85 path.count('[') > 0 or path.count(']') > 0 ) :
86 paths = ls(path)
87 return sum([ls(os.path.join(p,fname))
88 for p in paths], [])
89
90 mgr = storageManager(path)
91
92 try:
93 flist = subprocess.check_output(mgr.ls % path, shell=True).split()
94 except subprocess.CalledProcessError as err:
95 print(err.output)
96 return []
97
98 if not (os.path.basename(files) in ['', '*']): # no need to filter
99 pattern = fnmatch.translate(os.path.basename(files))
100 flist = filter(lambda x: re.search(pattern, x), flist)
101
102 if prefix:
103 if isinstance(prefix, str):
104 return [os.path.join(prefix+path, p) for p in flist]
105 else:
106 return [os.path.join(mgr.prefix+path, p) for p in flist]
107 else:
108 return [os.path.join(path, p) for p in flist]
109
110@deprecated("EOS is mounted on /eos with fuse, so you probably don't need this abstraction")
111def ls(path, longls=False):
112 """
113 Simple list of files
114
115 `longls` specifies long listing format
116 """
117
118 path = _rationalise(path)
119 mgr = storageManager(path)
120
121 if longls:
122 return subprocess.check_output(mgr.longls % path, shell=True)
123 else:
124 return subprocess.check_output(mgr.ls % path, shell=True)
125
126@deprecated("EOS is mounted on /eos with fuse, so you probably don't need this abstraction")
127def cp(src, dest='.'):
128 src = _rationalise(src)
129 dest = _rationalise(dest)
130 srcmgr = storageManager(src)
131 destmgr = storageManager(dest)
132
133 cp = 'cp'
134 if srcmgr.cp == 'xrdcp' or destmgr.cp == 'xrdcp': cp = 'xrdcp'
135
136 return os.system('%s %s%s %s%s' %(cp, srcmgr.prefix, src, destmgr.prefix, dest))
137
138
139
140class AccessError(RuntimeError): pass
141
142def get_lumi_blocks(root_file):
143
144 try:
145 from PyUtils.MetaReader import read_metadata
146 md = read_metadata(root_file)
147
148 return( md[root_file]['lumiBlockNumbers'] )
149 except Exception as e:
150 print( "Failed to read MetaData will fall back to looping ", repr(e))
151
152 try:
153 from PyUtils.RootUtils import import_root
154 root = import_root()
155 f = root.TFile.Open(root_file, 'READ')
156 lumiblocks = set()
157 metadata= f.Get('CollectionMetadata') if f else None
158 if metadata:
159 metadata.GetEntry(0)
160 import ctypes
161 key_name = str(ctypes.c_char_p(metadata.Key).value)
162 assert key_name == 'POOLCollectionID'
163 del metadata
164 coll_tree = f.Get('POOLCollectionTree') if f else None
165 if coll_tree:
166 evtmax = coll_tree.GetEntries()
167 if evtmax in (-1, None):
168 evtmax = 0
169 evtmax = int(evtmax)
170 for row in range(evtmax):
171 if coll_tree.GetEntry(row) < 0:
172 break
173 lbn = coll_tree.LumiBlockN
174 lumiblocks.add(lbn)
175 del coll_tree
176 return list( lumiblocks )
177 finally:
178 f.Close()
179
180def make_lumi_block_map_file(file_set, path):
181 with open(path, 'w') as mapfile:
182 for f, lbs in file_set.with_lumi_blocks():
183 print('Reading:', f)
184 mapfile.write('{} {}\n'.format(
185 os.path.basename(f),
186 ','.join(str(x) for x in lbs)))
187
188
190 def exists(self, path): raise NotImplementedError
191 def is_file(self, path): raise NotImplementedError
192 def is_directory(self, path): raise NotImplementedError
193 def children(self, path): raise NotImplementedError
194 def glob(self, pattern): raise NotImplementedError
195 def wrap(self, path): return path
196
198 def exists(self, path): return os.path.exists(path)
199 def is_directory(self, path): return os.path.isdir(path)
200 def is_file(self, path): return os.path.isfile(path)
201
202 def children(self, path):
203 def generator(p):
204 for dir_name, dirs, files in os.walk(p):
205 for f in files:
206 yield os.path.join(dir_name, f)
207 return generator(path)
208
209 def glob(self, pattern):
210 return glob.glob(pattern)
211
213 """ Accesses EOS using the command line interface.
214 NB: when EOS is fuse-mounted on /eos this class is not really necessary.
215 """
216
217 def __init__(self, prefix='root://eosatlas.cern.ch/'):
218 self.prefix = prefix
219
220 def wrap(self, path):
221 if path.startswith('/'):
222 path = self.prefix + path
223 return path
224
225 def unwrap(self, path):
226 if path.startswith(self.prefix):
227 path = path[len(self.prefix):]
228 return path
229
230 def exists(self, path):
231 return self._call('eos', '-b', 'ls', '-s', self.unwrap(path)) == 0
232
233 def is_file(self, path):
234 return self._call('eos', '-b', 'sat', '-f', self.unwrap(path)) == 0
235
236 def is_directory(self, path):
237 return self._call('eos', '-b', 'sat', '-d', self.unwrap(path)) == 0
238
239 def children(self, path):
240 with open(os.devnull, 'w') as null:
241 output = subprocess.check_output(['eos', '-b', 'find', '-f',
242 self.unwrap(path)], stderr=null)
243 return [l.strip() for l in output.split('\n')]
244
245 def _call(self, *args):
246 with open(os.devnull, 'w') as null:
247 retcode = subprocess.call(args, stderr=null)
248 return retcode
249
250class FilterError(RuntimeError): pass
251
253 """ Represents a list of input files.
254 This class abstracts over the different ways files can be specified, and
255 the different storage backends/protocols on which they reside. It is an
256 iterator, and provides some methods for filtering the file set. E.g.:
257
258 fs = FileSet.from_input('/eos/atlas/path/to/dataset/')
259 for f in fs.matching(r'.*AOD.*').only_existing():
260 print(f)
261 """
262
263 def __init__(self, iterator, backend):
264 self.backend = backend
265 self._iter = iterator
266 self._existing = False
267 self._white_pattern = None
268 self._black_pattern = None
269 self._strict = True
270 self._explicit = None
271 self._dedup = False
272 self._single_dataset = False
273 self.broken = []
274 self.lb_map = {}
275
276 @classmethod
277 def from_single_file(cls, path, backend=None):
278 return cls(iter([path]), backend or Local())
279
280 @classmethod
281 def from_directory(cls, path, backend=None):
282 be = backend or Local()
283 return cls(be.children(path), be)
284
285 @classmethod
286 def from_file_containing_list(cls, path, backend=None):
287 with open(path) as lf:
288 listtoiter = [l.strip() for l in lf.readlines()]
289 iterator = iter(listtoiter)
290 return cls(iterator, backend or Local())
291
292 @classmethod
293 def from_glob(cls, pattern, backend=None):
294 be = backend or Local()
295 return cls(be.glob(pattern), be)
296
297 @classmethod
298 def from_ds_info(cls, run, project, stream, base, backend=None):
299 path = os.path.join(base, project, stream,
300 '{:0{digits}d}'.format(int(run), digits=8))
301 return cls.from_directory(path, backend=backend)
302
303 @classmethod
304 def from_input(cls, input_string, backend=None):
305 ''' Guess what kind of input file specification was provided. '''
306 be = backend or Local()
307 if be.is_directory(input_string):
308 return cls.from_directory(input_string, be)
309 elif Local().is_file(input_string) and not (
310 input_string.endswith('.root') or
311 input_string[-7:-2] == '.root'):
312 return cls.from_file_containing_list(input_string, be)
313 elif be.is_file(input_string):
314 return cls.from_single_file(input_string, be)
315 elif '*' in input_string or '?' in input_string or '[' in input_string:
316 return cls.from_glob(input_string, be)
317 else:
318 raise AccessError('Unable to resolve input: ' + repr(input_string))
319
320 def __iter__(self):
321 it = self._iter
322 if self._white_pattern:
323 it = filter(lambda x: self._white_pattern.search(x), it)
324 if self._black_pattern:
325 it = filter(lambda x: not self._black_pattern.search(x), it)
326 if self._existing: # see: only_existing
327 if self._strict:
328 def generator(i, b):
329 for f in i:
330 if b.exists(f):
331 yield f
332 else:
333 raise AccessError('File not found: ' + f)
334 it = generator(it, self.backend)
335 else:
336 it = filter(lambda x: self.backend.exists(x), it)
337 if self._explicit is not None: # see: use_files_from
338 def generator(i, strict):
339 for f in i:
340 name = os.path.basename(f)
341 if self._explicit.pop(name, False):
342 yield f
343 if strict and self._explicit:
344 for f in self._explicit: print('Missing:', f)
345 raise FilterError('Not all explicit files were found.')
346 it = generator(it, self._strict)
347 if self._dedup: # see: only_latest
348 def fn(m, f):
349 name, ext = os.path.splitext(f)
350 if name in m:
351 m[name] = str(max(int(m[name]), int(ext[1:])))
352 else:
353 m[name] = ext[1:]
354 return m
355 def generator(em):
356 for name, ext in em.items():
357 yield '.'.join([name, ext])
358 it = generator(functools.reduce(fn, self, {}))
359 if self._single_dataset: # see: only_single_dataset
360 def generator(i):
361 dataset = None
362 for f in i:
363 ds = '.'.join(f.split('.')[0:3])
364 if dataset is None:
365 dataset = ds
366 if ds == dataset:
367 yield f
368 else:
369 raise FilterError(
370 "Files found from more than one dataset: '{}' != '{}'"
371 .format(ds, dataset))
372 it = generator(it)
373 it = map(lambda x: self.backend.wrap(x), it)
374 return it
375
376 def strict_mode(self, setting=True):
377 """ When strict, errors are raised in the following cases (which
378 otherwise cause the corresponding files to be silently skipped):
379
380 * When LB info is requested but cannot be found for a file (because
381 it was not in the map file, or we couldn't open the ROOT file).
382 * When `only_existing` is set and a file is missing.
383 * When a file list is provided and not all of the files it mentions
384 were encountered by the end of iteration.
385 """
386 self._strict = setting
387 return self
388
389 def matching(self, pattern):
390 ''' Only accept filenames matching the provided regular expression. '''
391 self._white_pattern = re.compile(pattern) if pattern else None
392 return self
393
394 def excluding(self, pattern):
395 ''' Skip filenames matching the provided regular expression. '''
396 self._black_pattern = re.compile(pattern) if pattern else None
397 return self
398
399 def use_files_from(self, path):
400 ''' Use specific filenames from within the provided dataset. '''
401 if path:
402 with open(path) as lf:
403 self._explicit = [l.strip() for l in lf.readlines()]
404 else:
405 self._explicit = None
406 return self
407
408 def only_existing(self, setting=True):
409 ''' Only use existing files. '''
410 self._existing = setting
411 return self
412
413 def only_latest(self, setting=True):
414 ''' Keep only the latest retry from sets like `*.1`, `*.2`. '''
415 self._dedup = setting
416 return self
417
418 def only_single_dataset(self, setting=True):
419 ''' Require all files to be from the same dataset. '''
420 self._single_dataset = setting
421 return self
422
423 def with_lumi_blocks(self, map_file=None):
424 """ Lookup the luminosity blocks contained in each file.
425 If a map file is provided it will be queried for the LB mapping,
426 otherwise each file will be opened and accessed using AthenaROOTAccess
427 which can be a little slow.
428 """
429 if map_file:
430 return self._with_lumi_blocks_from_map(map_file)
431 else:
432 return self._with_lumi_blocks_from_ara()
433
434 def _with_lumi_blocks_from_map(self, map_file):
435 with open(map_file) as mf:
436 for line in mf:
437 print(line)
438 fname = line.split(' ')[0]
439 print(line.split(' ')[0])
440 print(line.split(' ')[1])
441 lbs = set(int(l) for l in line.split(' ')[1].split(','))
442 self.lb_map[fname] = lbs
443 def generator(s):
444 for f in s:
445 try:
446 yield f, s.lb_map[os.path.basename(f)]
447 except KeyError:
448 if s._strict:
449 raise
450 else:
451 s.broken.append(f)
452 return generator(self)
453
455 def generator(s):
456 for f in s:
457 try:
458 lbs = get_lumi_blocks(f)
459 except AccessError:
460 if s._strict:
461 raise
462 else:
463 s.broken.append(f)
464 continue
465 yield f, set(lbs)
466 return generator(self)
void print(char *figname, TCanvas *c1)
#define max(a, b)
Definition cfImp.cxx:41
STL class.
glob(self, pattern)
Definition DiskUtils.py:194
is_file(self, path)
Definition DiskUtils.py:233
__init__(self, prefix='root://eosatlas.cern.ch/')
Definition DiskUtils.py:217
is_directory(self, path)
Definition DiskUtils.py:236
exists(self, path)
Definition DiskUtils.py:230
children(self, path)
Definition DiskUtils.py:239
unwrap(self, path)
Definition DiskUtils.py:225
_call(self, *args)
Definition DiskUtils.py:245
use_files_from(self, path)
Definition DiskUtils.py:399
from_directory(cls, path, backend=None)
Definition DiskUtils.py:281
__init__(self, iterator, backend)
Definition DiskUtils.py:263
only_existing(self, setting=True)
Definition DiskUtils.py:408
only_latest(self, setting=True)
Definition DiskUtils.py:413
strict_mode(self, setting=True)
Definition DiskUtils.py:376
with_lumi_blocks(self, map_file=None)
Definition DiskUtils.py:423
from_input(cls, input_string, backend=None)
Definition DiskUtils.py:304
excluding(self, pattern)
Definition DiskUtils.py:394
_with_lumi_blocks_from_map(self, map_file)
Definition DiskUtils.py:434
from_ds_info(cls, run, project, stream, base, backend=None)
Definition DiskUtils.py:298
only_single_dataset(self, setting=True)
Definition DiskUtils.py:418
matching(self, pattern)
Definition DiskUtils.py:389
from_file_containing_list(cls, path, backend=None)
Definition DiskUtils.py:286
from_single_file(cls, path, backend=None)
Definition DiskUtils.py:277
from_glob(cls, pattern, backend=None)
Definition DiskUtils.py:293
glob(self, pattern)
Definition DiskUtils.py:209
is_directory(self, path)
Definition DiskUtils.py:199
STL class.
bool exists(const std::string &filename)
does a file exist
void search(TDirectory *td, const std::string &s, std::string cwd, node *n)
recursive directory search for TH1 and TH2 and TProfiles
Definition hcg.cxx:739
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
storageManager(name)
Definition DiskUtils.py:50
make_lumi_block_map_file(file_set, path)
Definition DiskUtils.py:180
cp(src, dest='.')
Definition DiskUtils.py:127
get_lumi_blocks(root_file)
Definition DiskUtils.py:142
filelist(files, prefix=None)
Definition DiskUtils.py:63
ls(path, longls=False)
Definition DiskUtils.py:111
deprecated(message)
Definition DiskUtils.py:13