ATLAS Offline Software
Public Member Functions | Public Attributes | Private Member Functions | Private Attributes | List of all members
python.DiskUtils.FileSet Class Reference
Collaboration diagram for python.DiskUtils.FileSet:

Public Member Functions

def __init__ (self, iterator, backend)
 
def from_single_file (cls, path, backend=None)
 
def from_directory (cls, path, backend=None)
 
def from_file_containing_list (cls, path, backend=None)
 
def from_glob (cls, pattern, backend=None)
 
def from_ds_info (cls, run, project, stream, base, backend=None)
 
def from_input (cls, input_string, backend=None)
 
def __iter__ (self)
 
def strict_mode (self, setting=True)
 
def matching (self, pattern)
 
def excluding (self, pattern)
 
def use_files_from (self, path)
 
def only_existing (self, setting=True)
 
def only_latest (self, setting=True)
 
def only_single_dataset (self, setting=True)
 
def with_lumi_blocks (self, map_file=None)
 

Public Attributes

 backend
 
 broken
 
 lb_map
 

Private Member Functions

def _with_lumi_blocks_from_map (self, map_file)
 
def _with_lumi_blocks_from_ara (self)
 

Private Attributes

 _iter
 
 _existing
 
 _white_pattern
 
 _black_pattern
 
 _strict
 
 _explicit
 
 _dedup
 
 _single_dataset
 

Detailed Description

Represents a list of input files.
This class abstracts over the different ways files can be specified, and
the different storage backends/protocols on which they reside. It is an
iterator, and provides some methods for filtering the file set. E.g.:

    fs = FileSet.from_input('/eos/atlas/path/to/dataset/')
    for f in fs.matching(r'.*AOD.*').only_existing():
        print(f)

Definition at line 281 of file DiskUtils.py.

Constructor & Destructor Documentation

◆ __init__()

def python.DiskUtils.FileSet.__init__ (   self,
  iterator,
  backend 
)

Definition at line 292 of file DiskUtils.py.

292  def __init__(self, iterator, backend):
293  self.backend = backend
294  self._iter = iterator
295  self._existing = False
296  self._white_pattern = None
297  self._black_pattern = None
298  self._strict = True
299  self._explicit = None
300  self._dedup = False
301  self._single_dataset = False
302  self.broken = []
303  self.lb_map = {}
304 

Member Function Documentation

◆ __iter__()

def python.DiskUtils.FileSet.__iter__ (   self)

Definition at line 349 of file DiskUtils.py.

349  def __iter__(self):
350  it = self._iter
351  if self._white_pattern:
352  it = filter(lambda x: self._white_pattern.search(x), it)
353  if self._black_pattern:
354  it = filter(lambda x: not self._black_pattern.search(x), it)
355  if self._existing: # see: only_existing
356  if self._strict:
357  def generator(i, b):
358  for f in i:
359  if b.exists(f):
360  yield f
361  else:
362  raise AccessError('File not found: ' + f)
363  it = generator(it, self.backend)
364  else:
365  it = filter(lambda x: self.backend.exists(x), it)
366  if self._explicit is not None: # see: use_files_from
367  def generator(i, strict):
368  for f in i:
369  name = os.path.basename(f)
370  if self._explicit.pop(name, False):
371  yield f
372  if strict and self._explicit:
373  for f in self._explicit: print('Missing:', f)
374  raise FilterError('Not all explicit files were found.')
375  it = generator(it, self._strict)
376  if self._dedup: # see: only_latest
377  def fn(m, f):
378  name, ext = os.path.splitext(f)
379  if name in m:
380  m[name] = str(max(int(m[name]), int(ext[1:])))
381  else:
382  m[name] = ext[1:]
383  return m
384  def generator(em):
385  for name, ext in em.items():
386  yield '.'.join([name, ext])
387  it = generator(functools.reduce(fn, self, {}))
388  if self._single_dataset: # see: only_single_dataset
389  def generator(i):
390  dataset = None
391  for f in i:
392  ds = '.'.join(f.split('.')[0:3])
393  if dataset is None:
394  dataset = ds
395  if ds == dataset:
396  yield f
397  else:
398  raise FilterError(
399  "Files found from more than one dataset: '{}' != '{}'"
400  .format(ds, dataset))
401  it = generator(it)
402  it = map(lambda x: self.backend.wrap(x), it)
403  return it
404 

◆ _with_lumi_blocks_from_ara()

def python.DiskUtils.FileSet._with_lumi_blocks_from_ara (   self)
private

Definition at line 483 of file DiskUtils.py.

483  def _with_lumi_blocks_from_ara(self):
484  def generator(s):
485  for f in s:
486  try:
487  lbs = get_lumi_blocks(f)
488  except AccessError:
489  if s._strict:
490  raise
491  else:
492  s.broken.append(f)
493  continue
494  yield f, set(lbs)
495  return generator(self)

◆ _with_lumi_blocks_from_map()

def python.DiskUtils.FileSet._with_lumi_blocks_from_map (   self,
  map_file 
)
private

Definition at line 463 of file DiskUtils.py.

463  def _with_lumi_blocks_from_map(self, map_file):
464  with open(map_file) as mf:
465  for line in mf:
466  print(line)
467  fname = line.split(' ')[0]
468  print(line.split(' ')[0])
469  print(line.split(' ')[1])
470  lbs = set(int(l) for l in line.split(' ')[1].split(','))
471  self.lb_map[fname] = lbs
472  def generator(s):
473  for f in s:
474  try:
475  yield f, s.lb_map[os.path.basename(f)]
476  except KeyError:
477  if s._strict:
478  raise
479  else:
480  s.broken.append(f)
481  return generator(self)
482 

◆ excluding()

def python.DiskUtils.FileSet.excluding (   self,
  pattern 
)
Skip filenames matching the provided regular expression. 

Definition at line 423 of file DiskUtils.py.

423  def excluding(self, pattern):
424  ''' Skip filenames matching the provided regular expression. '''
425  self._black_pattern = re.compile(pattern) if pattern else None
426  return self
427 

◆ from_directory()

def python.DiskUtils.FileSet.from_directory (   cls,
  path,
  backend = None 
)

Definition at line 310 of file DiskUtils.py.

310  def from_directory(cls, path, backend=None):
311  be = backend or Local()
312  return cls(be.children(path), be)
313 

◆ from_ds_info()

def python.DiskUtils.FileSet.from_ds_info (   cls,
  run,
  project,
  stream,
  base,
  backend = None 
)

Definition at line 327 of file DiskUtils.py.

327  def from_ds_info(cls, run, project, stream, base, backend=None):
328  path = os.path.join(base, project, stream,
329  '{:0{digits}d}'.format(int(run), digits=8))
330  return cls.from_directory(path, backend=backend)
331 

◆ from_file_containing_list()

def python.DiskUtils.FileSet.from_file_containing_list (   cls,
  path,
  backend = None 
)

Definition at line 315 of file DiskUtils.py.

315  def from_file_containing_list(cls, path, backend=None):
316  with open(path) as lf:
317  listtoiter = [l.strip() for l in lf.readlines()]
318  iterator = iter(listtoiter)
319  return cls(iterator, backend or Local())
320 

◆ from_glob()

def python.DiskUtils.FileSet.from_glob (   cls,
  pattern,
  backend = None 
)

Definition at line 322 of file DiskUtils.py.

322  def from_glob(cls, pattern, backend=None):
323  be = backend or Local()
324  return cls(be.glob(pattern), be)
325 

◆ from_input()

def python.DiskUtils.FileSet.from_input (   cls,
  input_string,
  backend = None 
)
Guess what kind of input file specification was provided. 

Definition at line 333 of file DiskUtils.py.

333  def from_input(cls, input_string, backend=None):
334  ''' Guess what kind of input file specification was provided. '''
335  be = backend or Local()
336  if be.is_directory(input_string):
337  return cls.from_directory(input_string, be)
338  elif Local().is_file(input_string) and not (
339  input_string.endswith('.root') or
340  input_string[-7:-2] == '.root'):
341  return cls.from_file_containing_list(input_string, be)
342  elif be.is_file(input_string):
343  return cls.from_single_file(input_string, be)
344  elif '*' in input_string or '?' in input_string or '[' in input_string:
345  return cls.from_glob(input_string, be)
346  else:
347  raise AccessError('Unable to resolve input: ' + repr(input_string))
348 

◆ from_single_file()

def python.DiskUtils.FileSet.from_single_file (   cls,
  path,
  backend = None 
)

Definition at line 306 of file DiskUtils.py.

306  def from_single_file(cls, path, backend=None):
307  return cls(iter([path]), backend or Local())
308 

◆ matching()

def python.DiskUtils.FileSet.matching (   self,
  pattern 
)
Only accept filenames matching the provided regular expression. 

Definition at line 418 of file DiskUtils.py.

418  def matching(self, pattern):
419  ''' Only accept filenames matching the provided regular expression. '''
420  self._white_pattern = re.compile(pattern) if pattern else None
421  return self
422 

◆ only_existing()

def python.DiskUtils.FileSet.only_existing (   self,
  setting = True 
)
Only use existing files. 

Definition at line 437 of file DiskUtils.py.

437  def only_existing(self, setting=True):
438  ''' Only use existing files. '''
439  self._existing = setting
440  return self
441 

◆ only_latest()

def python.DiskUtils.FileSet.only_latest (   self,
  setting = True 
)
Keep only the latest retry from sets like `*.1`, `*.2`. 

Definition at line 442 of file DiskUtils.py.

442  def only_latest(self, setting=True):
443  ''' Keep only the latest retry from sets like `*.1`, `*.2`. '''
444  self._dedup = setting
445  return self
446 

◆ only_single_dataset()

def python.DiskUtils.FileSet.only_single_dataset (   self,
  setting = True 
)
Require all files to be from the same dataset. 

Definition at line 447 of file DiskUtils.py.

447  def only_single_dataset(self, setting=True):
448  ''' Require all files to be from the same dataset. '''
449  self._single_dataset = setting
450  return self
451 

◆ strict_mode()

def python.DiskUtils.FileSet.strict_mode (   self,
  setting = True 
)
When strict, errors are raised in the following cases (which
otherwise cause the corresponding files to be silently skipped):

  * When LB info is requested but cannot be found for a file (because
    it was not in the map file, or we couldn't open the ROOT file).
  * When `only_existing` is set and a file is missing.
  * When a file list is provided and not all of the files it mentions
    were encountered by the end of iteration.

Definition at line 405 of file DiskUtils.py.

405  def strict_mode(self, setting=True):
406  """ When strict, errors are raised in the following cases (which
407  otherwise cause the corresponding files to be silently skipped):
408 
409  * When LB info is requested but cannot be found for a file (because
410  it was not in the map file, or we couldn't open the ROOT file).
411  * When `only_existing` is set and a file is missing.
412  * When a file list is provided and not all of the files it mentions
413  were encountered by the end of iteration.
414  """
415  self._strict = setting
416  return self
417 

◆ use_files_from()

def python.DiskUtils.FileSet.use_files_from (   self,
  path 
)
Use specific filenames from within the provided dataset. 

Definition at line 428 of file DiskUtils.py.

428  def use_files_from(self, path):
429  ''' Use specific filenames from within the provided dataset. '''
430  if path:
431  with open(path) as lf:
432  self._explicit = [l.strip() for l in lf.readlines()]
433  else:
434  self._explicit = None
435  return self
436 

◆ with_lumi_blocks()

def python.DiskUtils.FileSet.with_lumi_blocks (   self,
  map_file = None 
)
Lookup the luminosity blocks contained in each file.
If a map file is provided it will be queried for the LB mapping,
otherwise each file will be opened and accessed using AthenaROOTAccess
which can be a little slow.

Definition at line 452 of file DiskUtils.py.

452  def with_lumi_blocks(self, map_file=None):
453  """ Lookup the luminosity blocks contained in each file.
454  If a map file is provided it will be queried for the LB mapping,
455  otherwise each file will be opened and accessed using AthenaROOTAccess
456  which can be a little slow.
457  """
458  if map_file:
459  return self._with_lumi_blocks_from_map(map_file)
460  else:
461  return self._with_lumi_blocks_from_ara()
462 

Member Data Documentation

◆ _black_pattern

python.DiskUtils.FileSet._black_pattern
private

Definition at line 297 of file DiskUtils.py.

◆ _dedup

python.DiskUtils.FileSet._dedup
private

Definition at line 300 of file DiskUtils.py.

◆ _existing

python.DiskUtils.FileSet._existing
private

Definition at line 295 of file DiskUtils.py.

◆ _explicit

python.DiskUtils.FileSet._explicit
private

Definition at line 299 of file DiskUtils.py.

◆ _iter

python.DiskUtils.FileSet._iter
private

Definition at line 294 of file DiskUtils.py.

◆ _single_dataset

python.DiskUtils.FileSet._single_dataset
private

Definition at line 301 of file DiskUtils.py.

◆ _strict

python.DiskUtils.FileSet._strict
private

Definition at line 298 of file DiskUtils.py.

◆ _white_pattern

python.DiskUtils.FileSet._white_pattern
private

Definition at line 296 of file DiskUtils.py.

◆ backend

python.DiskUtils.FileSet.backend

Definition at line 293 of file DiskUtils.py.

◆ broken

python.DiskUtils.FileSet.broken

Definition at line 302 of file DiskUtils.py.

◆ lb_map

python.DiskUtils.FileSet.lb_map

Definition at line 303 of file DiskUtils.py.


The documentation for this class was generated from the following file:
max
#define max(a, b)
Definition: cfImp.cxx:41
vtune_athena.format
format
Definition: vtune_athena.py:14
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
python.Bindings.__iter__
__iter__
Definition: Control/AthenaPython/python/Bindings.py:791
CaloClusterListBadChannel.cls
cls
Definition: CaloClusterListBadChannel.py:8
search
void search(TDirectory *td, const std::string &s, std::string cwd, node *n)
recursive directory search for TH1 and TH2 and TProfiles
Definition: hcg.cxx:738
covarianceTool.filter
filter
Definition: covarianceTool.py:514
python.getCurrentFolderTag.fn
fn
Definition: getCurrentFolderTag.py:65
PyAthena::repr
std::string repr(PyObject *o)
returns the string representation of a python object equivalent of calling repr(o) in python
Definition: PyAthenaUtils.cxx:106
python.DiskUtils.get_lumi_blocks
def get_lumi_blocks(root_file)
Definition: DiskUtils.py:143
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.processes.powheg.ZZ.ZZ.__init__
def __init__(self, base_directory, **kwargs)
Constructor: all process options are set here.
Definition: ZZ.py:18
Trk::open
@ open
Definition: BinningType.h:40
mc.generator
generator
Configure Herwig7 These are the commands corresponding to what would go into the regular Herwig infil...
Definition: mc.MGH7_FxFx_H71-DEFAULT_test.py:18
str
Definition: BTagTrackIpAccessor.cxx:11
dbg::print
void print(std::FILE *stream, std::format_string< Args... > fmt, Args &&... args)
Definition: SGImplSvc.cxx:70
python.dummyaccess.exists
def exists(filename)
Definition: dummyaccess.py:9
Trk::split
@ split
Definition: LayerMaterialProperties.h:38