12 from functools
import wraps
16 def wrapper(*args, **kwargs):
17 print(
'WARNING: [InDetBeamSpotExample.DiskUtils]',
18 '{}() is deprecated and will be removed'.
format( fn.__name__),
20 print(
'WARNING: ', message,
22 return fn(*args, **kwargs)
27 from collections
import namedtuple
28 StorageManager = namedtuple(
'StorageManager', [
'name',
'prefix',
'cp',
'ls',
'longls'])
29 CastorMgr =
StorageManager(name=
'castor', prefix=
'root://castoratlas/', cp=
'xrdcp', ls=
'nsls %s', longls=
'nsls -l %s')
30 RFIOMgr =
StorageManager(name=
'rfio', prefix=
'rfio:', cp=
'rfcp', ls=
'rfdir %s', longls=
'rfdir %s')
31 EOSMgr =
StorageManager(name=
'eos', prefix=
'root://eosatlas.cern.ch/', cp=
'xrdcp', ls=
'/bin/sh -l -c "LD_LIBRARY_PATH=/usr/lib64/ eos ls %s"', longls=
'/bin/sh -l -c "LD_LIBRARY_PATH=/usr/lib64/ eos ls -l %s"')
32 UnixMgr =
StorageManager(name=
'unix', prefix=
'', cp=
'cp', ls=
'ls %s', longls=
'ls -l %s')
36 Rationalise a path, removing prefix and esuring single leading slash
38 for p
in (
'root://castoratlas/',
'root://eosatlas.cern.ch/',
'rfio:',
'castor:'):
39 if path.startswith(p):
41 if path.startswith(
'//'):
43 if not path.startswith(
'/'):
49 @
deprecated(
"EOS is mounted on /eos with fuse, so you probably don't need this abstraction")
52 Return SotrageManager to deal with listing, copying and reading files from various storage systems
55 if name.startswith(
'/castor/'):
57 elif name.startswith(
'/eos/'):
62 @
deprecated(
"DiskUtils.FileSet replaces this functionality")
65 lists CASTOR/EOS name server directory/file entries.
66 If path is a directory, filelist lists the entries in the directory;
67 they are sorted alphabetically.
69 `files` specifies the CASTOR/EOS pathname.
70 `prefix` specifies the prefix one wants to prepend to the path found.
71 (e.g. prefix='root://castoratlas/' or 'root://eosatlas.cern.ch//')
72 if prefix=True it will determin the prefix based on the pathname
75 filelist('/castor/cern.ch/atlas/*')
76 filelist('/castor/cern.ch/atl*/foo?[bar]/*.pool.root.?')
77 filelist('/eos/atlas/*', prefix='root://eosatlas.cern.ch/')
78 filelist('/castor/cern.ch/atlas/*', prefix=True)
81 path, fname = os.path.split(files)
84 if ( path.count(
'*') > 0
or path.count(
'?') > 0
or
85 path.count(
'[') > 0
or path.count(
']') > 0 ) :
87 return sum([
ls(os.path.join(p,fname))
93 flist = subprocess.check_output(mgr.ls % path, shell=
True).
split()
94 except subprocess.CalledProcessError
as err:
98 if not (os.path.basename(files)
in [
'',
'*']):
99 pattern = fnmatch.translate(os.path.basename(files))
100 flist =
filter(
lambda x: re.search(pattern, x), flist)
103 if isinstance(prefix, str):
104 return [os.path.join(prefix+path, p)
for p
in flist]
106 return [os.path.join(mgr.prefix+path, p)
for p
in flist]
108 return [os.path.join(path, p)
for p
in flist]
110 @
deprecated(
"EOS is mounted on /eos with fuse, so you probably don't need this abstraction")
111 def ls(path, longls=False):
115 `longls` specifies long listing format
122 return subprocess.check_output(mgr.longls % path, shell=
True)
124 return subprocess.check_output(mgr.ls % path, shell=
True)
126 @
deprecated(
"EOS is mounted on /eos with fuse, so you probably don't need this abstraction")
127 def cp(src, dest='.'):
134 if srcmgr.cp ==
'xrdcp' or destmgr.cp ==
'xrdcp': cp =
'xrdcp'
136 return os.system(
'%s %s%s %s%s' %(cp, srcmgr.prefix, src, destmgr.prefix, dest))
145 from PyUtils.MetaReader
import read_metadata
148 return( md[root_file][
'lumiBlockNumbers'] )
149 except Exception
as e:
150 print(
"Failed to read MetaData will fall back to looping ",
repr(e))
153 from PyUtils.RootUtils
import import_root
155 f = root.TFile.Open(root_file,
'READ')
157 metadata= f.Get(
'CollectionMetadata')
if f
else None
161 key_name =
str(ctypes.c_char_p(metadata.Key).value)
162 assert key_name ==
'POOLCollectionID'
164 coll_tree = f.Get(
'POOLCollectionTree')
if f
else None
166 evtmax = coll_tree.GetEntries()
167 if evtmax
in (-1,
None):
170 for row
in range(evtmax):
171 if coll_tree.GetEntry(row) < 0:
173 lbn = coll_tree.LumiBlockN
176 return list( lumiblocks )
181 with open(path,
'w')
as mapfile:
182 for f, lbs
in file_set.with_lumi_blocks():
184 mapfile.write(
'{} {}\n'.
format(
186 ','.
join(
str(x)
for x
in lbs)))
190 def exists(self, path):
raise NotImplementedError
191 def is_file(self, path):
raise NotImplementedError
193 def children(self, path):
raise NotImplementedError
194 def glob(self, pattern):
raise NotImplementedError
195 def wrap(self, path):
return path
198 def exists(self, path):
return os.path.exists(path)
200 def is_file(self, path):
return os.path.isfile(path)
204 for dir_name, dirs, files
in os.walk(p):
206 yield os.path.join(dir_name, f)
210 return glob.glob(pattern)
213 """ Accesses EOS using the command line interface.
214 NB: when EOS is fuse-mounted on /eos this class is not really necessary.
217 def __init__(self, prefix='root://eosatlas.cern.ch/
'):
221 if path.startswith(
'/'):
226 if path.startswith(self.
prefix):
227 path = path[len(self.
prefix):]
231 return self.
_call(
'eos',
'-b',
'ls',
'-s', self.
unwrap(path)) == 0
234 return self.
_call(
'eos',
'-b',
'sat',
'-f', self.
unwrap(path)) == 0
237 return self.
_call(
'eos',
'-b',
'sat',
'-d', self.
unwrap(path)) == 0
240 with open(os.devnull,
'w')
as null:
241 output = subprocess.check_output([
'eos',
'-b',
'find',
'-f',
242 self.
unwrap(path)], stderr=null)
243 return [l.strip()
for l
in output.split(
'\n')]
246 with open(os.devnull,
'w')
as null:
247 retcode = subprocess.call(args, stderr=null)
253 """ Represents a list of input files.
254 This class abstracts over the different ways files can be specified, and
255 the different storage backends/protocols on which they reside. It is an
256 iterator, and provides some methods for filtering the file set. E.g.:
258 fs = FileSet.from_input('/eos/atlas/path/to/dataset/')
259 for f in fs.matching(r'.*AOD.*').only_existing():
282 be = backend
or Local()
283 return cls(be.children(path), be)
287 with open(path)
as lf:
288 listtoiter = [l.strip()
for l
in lf.readlines()]
289 iterator =
iter(listtoiter)
290 return cls(iterator, backend
or Local())
294 be = backend
or Local()
295 return cls(be.glob(pattern), be)
299 path = os.path.join(base, project, stream,
300 '{:0{digits}d}'.
format(
int(run), digits=8))
305 ''' Guess what kind of input file specification was provided. '''
306 be = backend
or Local()
307 if be.is_directory(input_string):
309 elif Local().is_file(input_string)
and not (
310 input_string.endswith(
'.root')
or
311 input_string[-7:-2] ==
'.root'):
313 elif be.is_file(input_string):
315 elif '*' in input_string
or '?' in input_string
or '[' in input_string:
340 name = os.path.basename(f)
345 raise FilterError(
'Not all explicit files were found.')
349 name, ext = os.path.splitext(f)
356 for name, ext
in em.items():
357 yield '.'.
join([name, ext])
358 it =
generator(functools.reduce(fn, self, {}))
363 ds =
'.'.
join(f.split(
'.')[0:3])
370 "Files found from more than one dataset: '{}' != '{}'"
373 it = map(
lambda x: self.
backend.wrap(x), it)
377 """ When strict, errors are raised in the following cases (which
378 otherwise cause the corresponding files to be silently skipped):
380 * When LB info is requested but cannot be found for a file (because
381 it was not in the map file, or we couldn't open the ROOT file).
382 * When `only_existing` is set and a file is missing.
383 * When a file list is provided and not all of the files it mentions
384 were encountered by the end of iteration.
390 ''' Only accept filenames matching the provided regular expression. '''
395 ''' Skip filenames matching the provided regular expression. '''
400 ''' Use specific filenames from within the provided dataset. '''
402 with open(path)
as lf:
403 self.
_explicit = [l.strip()
for l
in lf.readlines()]
409 ''' Only use existing files. '''
414 ''' Keep only the latest retry from sets like `*.1`, `*.2`. '''
419 ''' Require all files to be from the same dataset. '''
424 """ Lookup the luminosity blocks contained in each file.
425 If a map file is provided it will be queried for the LB mapping,
426 otherwise each file will be opened and accessed using AthenaROOTAccess
427 which can be a little slow.
435 with open(map_file)
as mf:
438 fname = line.split(
' ')[0]
439 print(line.split(
' ')[0])
440 print(line.split(
' ')[1])
441 lbs =
set(
int(l)
for l
in line.split(
' ')[1].
split(
','))
446 yield f, s.lb_map[os.path.basename(f)]