ATLAS Offline Software
Loading...
Searching...
No Matches
python.DiskUtils.FileSet Class Reference
Collaboration diagram for python.DiskUtils.FileSet:

Public Member Functions

 __init__ (self, iterator, backend)
 from_single_file (cls, path, backend=None)
 from_directory (cls, path, backend=None)
 from_file_containing_list (cls, path, backend=None)
 from_glob (cls, pattern, backend=None)
 from_ds_info (cls, run, project, stream, base, backend=None)
 from_input (cls, input_string, backend=None)
 __iter__ (self)
 strict_mode (self, setting=True)
 matching (self, pattern)
 excluding (self, pattern)
 use_files_from (self, path)
 only_existing (self, setting=True)
 only_latest (self, setting=True)
 only_single_dataset (self, setting=True)
 with_lumi_blocks (self, map_file=None)

Public Attributes

 backend = backend
list broken = []
dict lb_map = {}

Protected Member Functions

 _with_lumi_blocks_from_map (self, map_file)
 _with_lumi_blocks_from_ara (self)

Protected Attributes

 _iter = iterator
bool _existing = False
 _white_pattern = None
 _black_pattern = None
bool _strict = True
print(, f) _explicit = None
bool _dedup = False
bool _single_dataset = False

Detailed Description

 Represents a list of input files.
This class abstracts over the different ways files can be specified, and
the different storage backends/protocols on which they reside. It is an
iterator, and provides some methods for filtering the file set. E.g.:

    fs = FileSet.from_input('/eos/atlas/path/to/dataset/')
    for f in fs.matching(r'.*AOD.*').only_existing():
        print(f)

Definition at line 252 of file DiskUtils.py.

Constructor & Destructor Documentation

◆ __init__()

python.DiskUtils.FileSet.__init__ ( self,
iterator,
backend )

Definition at line 263 of file DiskUtils.py.

263 def __init__(self, iterator, backend):
264 self.backend = backend
265 self._iter = iterator
266 self._existing = False
267 self._white_pattern = None
268 self._black_pattern = None
269 self._strict = True
270 self._explicit = None
271 self._dedup = False
272 self._single_dataset = False
273 self.broken = []
274 self.lb_map = {}
275

Member Function Documentation

◆ __iter__()

python.DiskUtils.FileSet.__iter__ ( self)

Definition at line 320 of file DiskUtils.py.

320 def __iter__(self):
321 it = self._iter
322 if self._white_pattern:
323 it = filter(lambda x: self._white_pattern.search(x), it)
324 if self._black_pattern:
325 it = filter(lambda x: not self._black_pattern.search(x), it)
326 if self._existing: # see: only_existing
327 if self._strict:
328 def generator(i, b):
329 for f in i:
330 if b.exists(f):
331 yield f
332 else:
333 raise AccessError('File not found: ' + f)
334 it = generator(it, self.backend)
335 else:
336 it = filter(lambda x: self.backend.exists(x), it)
337 if self._explicit is not None: # see: use_files_from
338 def generator(i, strict):
339 for f in i:
340 name = os.path.basename(f)
341 if self._explicit.pop(name, False):
342 yield f
343 if strict and self._explicit:
344 for f in self._explicit: print('Missing:', f)
345 raise FilterError('Not all explicit files were found.')
346 it = generator(it, self._strict)
347 if self._dedup: # see: only_latest
348 def fn(m, f):
349 name, ext = os.path.splitext(f)
350 if name in m:
351 m[name] = str(max(int(m[name]), int(ext[1:])))
352 else:
353 m[name] = ext[1:]
354 return m
355 def generator(em):
356 for name, ext in em.items():
357 yield '.'.join([name, ext])
358 it = generator(functools.reduce(fn, self, {}))
359 if self._single_dataset: # see: only_single_dataset
360 def generator(i):
361 dataset = None
362 for f in i:
363 ds = '.'.join(f.split('.')[0:3])
364 if dataset is None:
365 dataset = ds
366 if ds == dataset:
367 yield f
368 else:
369 raise FilterError(
370 "Files found from more than one dataset: '{}' != '{}'"
371 .format(ds, dataset))
372 it = generator(it)
373 it = map(lambda x: self.backend.wrap(x), it)
374 return it
375
void print(char *figname, TCanvas *c1)
#define max(a, b)
Definition cfImp.cxx:41
STL class.
bool exists(const std::string &filename)
does a file exist
void search(TDirectory *td, const std::string &s, std::string cwd, node *n)
recursive directory search for TH1 and TH2 and TProfiles
Definition hcg.cxx:739

◆ _with_lumi_blocks_from_ara()

python.DiskUtils.FileSet._with_lumi_blocks_from_ara ( self)
protected

Definition at line 454 of file DiskUtils.py.

454 def _with_lumi_blocks_from_ara(self):
455 def generator(s):
456 for f in s:
457 try:
458 lbs = get_lumi_blocks(f)
459 except AccessError:
460 if s._strict:
461 raise
462 else:
463 s.broken.append(f)
464 continue
465 yield f, set(lbs)
466 return generator(self)
STL class.

◆ _with_lumi_blocks_from_map()

python.DiskUtils.FileSet._with_lumi_blocks_from_map ( self,
map_file )
protected

Definition at line 434 of file DiskUtils.py.

434 def _with_lumi_blocks_from_map(self, map_file):
435 with open(map_file) as mf:
436 for line in mf:
437 print(line)
438 fname = line.split(' ')[0]
439 print(line.split(' ')[0])
440 print(line.split(' ')[1])
441 lbs = set(int(l) for l in line.split(' ')[1].split(','))
442 self.lb_map[fname] = lbs
443 def generator(s):
444 for f in s:
445 try:
446 yield f, s.lb_map[os.path.basename(f)]
447 except KeyError:
448 if s._strict:
449 raise
450 else:
451 s.broken.append(f)
452 return generator(self)
453
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177

◆ excluding()

python.DiskUtils.FileSet.excluding ( self,
pattern )
Skip filenames matching the provided regular expression. 

Definition at line 394 of file DiskUtils.py.

394 def excluding(self, pattern):
395 ''' Skip filenames matching the provided regular expression. '''
396 self._black_pattern = re.compile(pattern) if pattern else None
397 return self
398

◆ from_directory()

python.DiskUtils.FileSet.from_directory ( cls,
path,
backend = None )

Definition at line 281 of file DiskUtils.py.

281 def from_directory(cls, path, backend=None):
282 be = backend or Local()
283 return cls(be.children(path), be)
284

◆ from_ds_info()

python.DiskUtils.FileSet.from_ds_info ( cls,
run,
project,
stream,
base,
backend = None )

Definition at line 298 of file DiskUtils.py.

298 def from_ds_info(cls, run, project, stream, base, backend=None):
299 path = os.path.join(base, project, stream,
300 '{:0{digits}d}'.format(int(run), digits=8))
301 return cls.from_directory(path, backend=backend)
302

◆ from_file_containing_list()

python.DiskUtils.FileSet.from_file_containing_list ( cls,
path,
backend = None )

Definition at line 286 of file DiskUtils.py.

286 def from_file_containing_list(cls, path, backend=None):
287 with open(path) as lf:
288 listtoiter = [l.strip() for l in lf.readlines()]
289 iterator = iter(listtoiter)
290 return cls(iterator, backend or Local())
291

◆ from_glob()

python.DiskUtils.FileSet.from_glob ( cls,
pattern,
backend = None )

Definition at line 293 of file DiskUtils.py.

293 def from_glob(cls, pattern, backend=None):
294 be = backend or Local()
295 return cls(be.glob(pattern), be)
296

◆ from_input()

python.DiskUtils.FileSet.from_input ( cls,
input_string,
backend = None )
Guess what kind of input file specification was provided. 

Definition at line 304 of file DiskUtils.py.

304 def from_input(cls, input_string, backend=None):
305 ''' Guess what kind of input file specification was provided. '''
306 be = backend or Local()
307 if be.is_directory(input_string):
308 return cls.from_directory(input_string, be)
309 elif Local().is_file(input_string) and not (
310 input_string.endswith('.root') or
311 input_string[-7:-2] == '.root'):
312 return cls.from_file_containing_list(input_string, be)
313 elif be.is_file(input_string):
314 return cls.from_single_file(input_string, be)
315 elif '*' in input_string or '?' in input_string or '[' in input_string:
316 return cls.from_glob(input_string, be)
317 else:
318 raise AccessError('Unable to resolve input: ' + repr(input_string))
319

◆ from_single_file()

python.DiskUtils.FileSet.from_single_file ( cls,
path,
backend = None )

Definition at line 277 of file DiskUtils.py.

277 def from_single_file(cls, path, backend=None):
278 return cls(iter([path]), backend or Local())
279

◆ matching()

python.DiskUtils.FileSet.matching ( self,
pattern )
Only accept filenames matching the provided regular expression. 

Definition at line 389 of file DiskUtils.py.

389 def matching(self, pattern):
390 ''' Only accept filenames matching the provided regular expression. '''
391 self._white_pattern = re.compile(pattern) if pattern else None
392 return self
393

◆ only_existing()

python.DiskUtils.FileSet.only_existing ( self,
setting = True )
Only use existing files. 

Definition at line 408 of file DiskUtils.py.

408 def only_existing(self, setting=True):
409 ''' Only use existing files. '''
410 self._existing = setting
411 return self
412

◆ only_latest()

python.DiskUtils.FileSet.only_latest ( self,
setting = True )
Keep only the latest retry from sets like `*.1`, `*.2`. 

Definition at line 413 of file DiskUtils.py.

413 def only_latest(self, setting=True):
414 ''' Keep only the latest retry from sets like `*.1`, `*.2`. '''
415 self._dedup = setting
416 return self
417

◆ only_single_dataset()

python.DiskUtils.FileSet.only_single_dataset ( self,
setting = True )
Require all files to be from the same dataset. 

Definition at line 418 of file DiskUtils.py.

418 def only_single_dataset(self, setting=True):
419 ''' Require all files to be from the same dataset. '''
420 self._single_dataset = setting
421 return self
422

◆ strict_mode()

python.DiskUtils.FileSet.strict_mode ( self,
setting = True )
 When strict, errors are raised in the following cases (which
otherwise cause the corresponding files to be silently skipped):

  * When LB info is requested but cannot be found for a file (because
    it was not in the map file, or we couldn't open the ROOT file).
  * When `only_existing` is set and a file is missing.
  * When a file list is provided and not all of the files it mentions
    were encountered by the end of iteration.

Definition at line 376 of file DiskUtils.py.

376 def strict_mode(self, setting=True):
377 """ When strict, errors are raised in the following cases (which
378 otherwise cause the corresponding files to be silently skipped):
379
380 * When LB info is requested but cannot be found for a file (because
381 it was not in the map file, or we couldn't open the ROOT file).
382 * When `only_existing` is set and a file is missing.
383 * When a file list is provided and not all of the files it mentions
384 were encountered by the end of iteration.
385 """
386 self._strict = setting
387 return self
388

◆ use_files_from()

python.DiskUtils.FileSet.use_files_from ( self,
path )
Use specific filenames from within the provided dataset. 

Definition at line 399 of file DiskUtils.py.

399 def use_files_from(self, path):
400 ''' Use specific filenames from within the provided dataset. '''
401 if path:
402 with open(path) as lf:
403 self._explicit = [l.strip() for l in lf.readlines()]
404 else:
405 self._explicit = None
406 return self
407

◆ with_lumi_blocks()

python.DiskUtils.FileSet.with_lumi_blocks ( self,
map_file = None )
 Lookup the luminosity blocks contained in each file.
If a map file is provided it will be queried for the LB mapping,
otherwise each file will be opened and accessed using AthenaROOTAccess
which can be a little slow.

Definition at line 423 of file DiskUtils.py.

423 def with_lumi_blocks(self, map_file=None):
424 """ Lookup the luminosity blocks contained in each file.
425 If a map file is provided it will be queried for the LB mapping,
426 otherwise each file will be opened and accessed using AthenaROOTAccess
427 which can be a little slow.
428 """
429 if map_file:
430 return self._with_lumi_blocks_from_map(map_file)
431 else:
432 return self._with_lumi_blocks_from_ara()
433

Member Data Documentation

◆ _black_pattern

python.DiskUtils.FileSet._black_pattern = None
protected

Definition at line 268 of file DiskUtils.py.

◆ _dedup

bool python.DiskUtils.FileSet._dedup = False
protected

Definition at line 271 of file DiskUtils.py.

◆ _existing

bool python.DiskUtils.FileSet._existing = False
protected

Definition at line 266 of file DiskUtils.py.

◆ _explicit

print(, f) python.DiskUtils.FileSet._explicit = None
protected

Definition at line 270 of file DiskUtils.py.

◆ _iter

python.DiskUtils.FileSet._iter = iterator
protected

Definition at line 265 of file DiskUtils.py.

◆ _single_dataset

bool python.DiskUtils.FileSet._single_dataset = False
protected

Definition at line 272 of file DiskUtils.py.

◆ _strict

bool python.DiskUtils.FileSet._strict = True
protected

Definition at line 269 of file DiskUtils.py.

◆ _white_pattern

python.DiskUtils.FileSet._white_pattern = None
protected

Definition at line 267 of file DiskUtils.py.

◆ backend

python.DiskUtils.FileSet.backend = backend

Definition at line 264 of file DiskUtils.py.

◆ broken

list python.DiskUtils.FileSet.broken = []

Definition at line 273 of file DiskUtils.py.

◆ lb_map

dict python.DiskUtils.FileSet.lb_map = {}

Definition at line 274 of file DiskUtils.py.


The documentation for this class was generated from the following file: