ATLAS Offline Software
Public Member Functions | Public Attributes | Private Member Functions | Private Attributes | List of all members
python.DiskUtils.FileSet Class Reference
Collaboration diagram for python.DiskUtils.FileSet:

Public Member Functions

def __init__ (self, iterator, backend)
 
def from_single_file (cls, path, backend=None)
 
def from_directory (cls, path, backend=None)
 
def from_file_containing_list (cls, path, backend=None)
 
def from_glob (cls, pattern, backend=None)
 
def from_ds_info (cls, run, project, stream, base, backend=None)
 
def from_input (cls, input_string, backend=None)
 
def __iter__ (self)
 
def strict_mode (self, setting=True)
 
def matching (self, pattern)
 
def excluding (self, pattern)
 
def use_files_from (self, path)
 
def only_existing (self, setting=True)
 
def only_latest (self, setting=True)
 
def only_single_dataset (self, setting=True)
 
def with_lumi_blocks (self, map_file=None)
 

Public Attributes

 backend
 
 broken
 
 lb_map
 

Private Member Functions

def _with_lumi_blocks_from_map (self, map_file)
 
def _with_lumi_blocks_from_ara (self)
 

Private Attributes

 _iter
 
 _existing
 
 _white_pattern
 
 _black_pattern
 
 _strict
 
 _explicit
 
 _dedup
 
 _single_dataset
 

Detailed Description

Represents a list of input files.
This class abstracts over the different ways files can be specified, and
the different storage backends/protocols on which they reside. It is an
iterator, and provides some methods for filtering the file set. E.g.:

    fs = FileSet.from_input('/eos/atlas/path/to/dataset/')
    for f in fs.matching(r'.*AOD.*').only_existing():
        print(f)

Definition at line 280 of file DiskUtils.py.

Constructor & Destructor Documentation

◆ __init__()

def python.DiskUtils.FileSet.__init__ (   self,
  iterator,
  backend 
)

Definition at line 291 of file DiskUtils.py.

291  def __init__(self, iterator, backend):
292  self.backend = backend
293  self._iter = iterator
294  self._existing = False
295  self._white_pattern = None
296  self._black_pattern = None
297  self._strict = True
298  self._explicit = None
299  self._dedup = False
300  self._single_dataset = False
301  self.broken = []
302  self.lb_map = {}
303 

Member Function Documentation

◆ __iter__()

def python.DiskUtils.FileSet.__iter__ (   self)

Definition at line 348 of file DiskUtils.py.

348  def __iter__(self):
349  it = self._iter
350  if self._white_pattern:
351  it = filter(lambda x: self._white_pattern.search(x), it)
352  if self._black_pattern:
353  it = filter(lambda x: not self._black_pattern.search(x), it)
354  if self._existing: # see: only_existing
355  if self._strict:
356  def generator(i, b):
357  for f in i:
358  if b.exists(f):
359  yield f
360  else:
361  raise AccessError('File not found: ' + f)
362  it = generator(it, self.backend)
363  else:
364  it = filter(lambda x: self.backend.exists(x), it)
365  if self._explicit is not None: # see: use_files_from
366  def generator(i, strict):
367  for f in i:
368  name = os.path.basename(f)
369  if self._explicit.pop(name, False):
370  yield f
371  if strict and self._explicit:
372  for f in self._explicit: print('Missing:', f)
373  raise FilterError('Not all explicit files were found.')
374  it = generator(it, self._strict)
375  if self._dedup: # see: only_latest
376  def fn(m, f):
377  name, ext = os.path.splitext(f)
378  if name in m:
379  m[name] = str(max(int(m[name]), int(ext[1:])))
380  else:
381  m[name] = ext[1:]
382  return m
383  def generator(em):
384  for name, ext in em.items():
385  yield '.'.join([name, ext])
386  it = generator(functools.reduce(fn, self, {}))
387  if self._single_dataset: # see: only_single_dataset
388  def generator(i):
389  dataset = None
390  for f in i:
391  ds = '.'.join(f.split('.')[0:3])
392  if dataset is None:
393  dataset = ds
394  if ds == dataset:
395  yield f
396  else:
397  raise FilterError(
398  "Files found from more than one dataset: '{}' != '{}'"
399  .format(ds, dataset))
400  it = generator(it)
401  it = map(lambda x: self.backend.wrap(x), it)
402  return it
403 

◆ _with_lumi_blocks_from_ara()

def python.DiskUtils.FileSet._with_lumi_blocks_from_ara (   self)
private

Definition at line 482 of file DiskUtils.py.

482  def _with_lumi_blocks_from_ara(self):
483  def generator(s):
484  for f in s:
485  try:
486  lbs = get_lumi_blocks(f)
487  except AccessError:
488  if s._strict:
489  raise
490  else:
491  s.broken.append(f)
492  continue
493  yield f, set(lbs)
494  return generator(self)

◆ _with_lumi_blocks_from_map()

def python.DiskUtils.FileSet._with_lumi_blocks_from_map (   self,
  map_file 
)
private

Definition at line 462 of file DiskUtils.py.

462  def _with_lumi_blocks_from_map(self, map_file):
463  with open(map_file) as mf:
464  for line in mf:
465  print(line)
466  fname = line.split(' ')[0]
467  print(line.split(' ')[0])
468  print(line.split(' ')[1])
469  lbs = set(int(l) for l in line.split(' ')[1].split(','))
470  self.lb_map[fname] = lbs
471  def generator(s):
472  for f in s:
473  try:
474  yield f, s.lb_map[os.path.basename(f)]
475  except KeyError:
476  if s._strict:
477  raise
478  else:
479  s.broken.append(f)
480  return generator(self)
481 

◆ excluding()

def python.DiskUtils.FileSet.excluding (   self,
  pattern 
)
Skip filenames matching the provided regular expression. 

Definition at line 422 of file DiskUtils.py.

422  def excluding(self, pattern):
423  ''' Skip filenames matching the provided regular expression. '''
424  self._black_pattern = re.compile(pattern) if pattern else None
425  return self
426 

◆ from_directory()

def python.DiskUtils.FileSet.from_directory (   cls,
  path,
  backend = None 
)

Definition at line 309 of file DiskUtils.py.

309  def from_directory(cls, path, backend=None):
310  be = backend or Local()
311  return cls(be.children(path), be)
312 

◆ from_ds_info()

def python.DiskUtils.FileSet.from_ds_info (   cls,
  run,
  project,
  stream,
  base,
  backend = None 
)

Definition at line 326 of file DiskUtils.py.

326  def from_ds_info(cls, run, project, stream, base, backend=None):
327  path = os.path.join(base, project, stream,
328  '{:0{digits}d}'.format(int(run), digits=8))
329  return cls.from_directory(path, backend=backend)
330 

◆ from_file_containing_list()

def python.DiskUtils.FileSet.from_file_containing_list (   cls,
  path,
  backend = None 
)

Definition at line 314 of file DiskUtils.py.

314  def from_file_containing_list(cls, path, backend=None):
315  with open(path) as lf:
316  listtoiter = [l.strip() for l in lf.readlines()]
317  iterator = iter(listtoiter)
318  return cls(iterator, backend or Local())
319 

◆ from_glob()

def python.DiskUtils.FileSet.from_glob (   cls,
  pattern,
  backend = None 
)

Definition at line 321 of file DiskUtils.py.

321  def from_glob(cls, pattern, backend=None):
322  be = backend or Local()
323  return cls(be.glob(pattern), be)
324 

◆ from_input()

def python.DiskUtils.FileSet.from_input (   cls,
  input_string,
  backend = None 
)
Guess what kind of input file specification was provided. 

Definition at line 332 of file DiskUtils.py.

332  def from_input(cls, input_string, backend=None):
333  ''' Guess what kind of input file specification was provided. '''
334  be = backend or Local()
335  if be.is_directory(input_string):
336  return cls.from_directory(input_string, be)
337  elif Local().is_file(input_string) and not (
338  input_string.endswith('.root') or
339  input_string[-7:-2] == '.root'):
340  return cls.from_file_containing_list(input_string, be)
341  elif be.is_file(input_string):
342  return cls.from_single_file(input_string, be)
343  elif '*' in input_string or '?' in input_string or '[' in input_string:
344  return cls.from_glob(input_string, be)
345  else:
346  raise AccessError('Unable to resolve input: ' + repr(input_string))
347 

◆ from_single_file()

def python.DiskUtils.FileSet.from_single_file (   cls,
  path,
  backend = None 
)

Definition at line 305 of file DiskUtils.py.

305  def from_single_file(cls, path, backend=None):
306  return cls(iter([path]), backend or Local())
307 

◆ matching()

def python.DiskUtils.FileSet.matching (   self,
  pattern 
)
Only accept filenames matching the provided regular expression. 

Definition at line 417 of file DiskUtils.py.

417  def matching(self, pattern):
418  ''' Only accept filenames matching the provided regular expression. '''
419  self._white_pattern = re.compile(pattern) if pattern else None
420  return self
421 

◆ only_existing()

def python.DiskUtils.FileSet.only_existing (   self,
  setting = True 
)
Only use existing files. 

Definition at line 436 of file DiskUtils.py.

436  def only_existing(self, setting=True):
437  ''' Only use existing files. '''
438  self._existing = setting
439  return self
440 

◆ only_latest()

def python.DiskUtils.FileSet.only_latest (   self,
  setting = True 
)
Keep only the latest retry from sets like `*.1`, `*.2`. 

Definition at line 441 of file DiskUtils.py.

441  def only_latest(self, setting=True):
442  ''' Keep only the latest retry from sets like `*.1`, `*.2`. '''
443  self._dedup = setting
444  return self
445 

◆ only_single_dataset()

def python.DiskUtils.FileSet.only_single_dataset (   self,
  setting = True 
)
Require all files to be from the same dataset. 

Definition at line 446 of file DiskUtils.py.

446  def only_single_dataset(self, setting=True):
447  ''' Require all files to be from the same dataset. '''
448  self._single_dataset = setting
449  return self
450 

◆ strict_mode()

def python.DiskUtils.FileSet.strict_mode (   self,
  setting = True 
)
When strict, errors are raised in the following cases (which
otherwise cause the corresponding files to be silently skipped):

  * When LB info is requested but cannot be found for a file (because
    it was not in the map file, or we couldn't open the ROOT file).
  * When `only_existing` is set and a file is missing.
  * When a file list is provided and not all of the files it mentions
    were encountered by the end of iteration.

Definition at line 404 of file DiskUtils.py.

404  def strict_mode(self, setting=True):
405  """ When strict, errors are raised in the following cases (which
406  otherwise cause the corresponding files to be silently skipped):
407 
408  * When LB info is requested but cannot be found for a file (because
409  it was not in the map file, or we couldn't open the ROOT file).
410  * When `only_existing` is set and a file is missing.
411  * When a file list is provided and not all of the files it mentions
412  were encountered by the end of iteration.
413  """
414  self._strict = setting
415  return self
416 

◆ use_files_from()

def python.DiskUtils.FileSet.use_files_from (   self,
  path 
)
Use specific filenames from within the provided dataset. 

Definition at line 427 of file DiskUtils.py.

427  def use_files_from(self, path):
428  ''' Use specific filenames from within the provided dataset. '''
429  if path:
430  with open(path) as lf:
431  self._explicit = [l.strip() for l in lf.readlines()]
432  else:
433  self._explicit = None
434  return self
435 

◆ with_lumi_blocks()

def python.DiskUtils.FileSet.with_lumi_blocks (   self,
  map_file = None 
)
Lookup the luminosity blocks contained in each file.
If a map file is provided it will be queried for the LB mapping,
otherwise each file will be opened and accessed using AthenaROOTAccess
which can be a little slow.

Definition at line 451 of file DiskUtils.py.

451  def with_lumi_blocks(self, map_file=None):
452  """ Lookup the luminosity blocks contained in each file.
453  If a map file is provided it will be queried for the LB mapping,
454  otherwise each file will be opened and accessed using AthenaROOTAccess
455  which can be a little slow.
456  """
457  if map_file:
458  return self._with_lumi_blocks_from_map(map_file)
459  else:
460  return self._with_lumi_blocks_from_ara()
461 

Member Data Documentation

◆ _black_pattern

python.DiskUtils.FileSet._black_pattern
private

Definition at line 296 of file DiskUtils.py.

◆ _dedup

python.DiskUtils.FileSet._dedup
private

Definition at line 299 of file DiskUtils.py.

◆ _existing

python.DiskUtils.FileSet._existing
private

Definition at line 294 of file DiskUtils.py.

◆ _explicit

python.DiskUtils.FileSet._explicit
private

Definition at line 298 of file DiskUtils.py.

◆ _iter

python.DiskUtils.FileSet._iter
private

Definition at line 293 of file DiskUtils.py.

◆ _single_dataset

python.DiskUtils.FileSet._single_dataset
private

Definition at line 300 of file DiskUtils.py.

◆ _strict

python.DiskUtils.FileSet._strict
private

Definition at line 297 of file DiskUtils.py.

◆ _white_pattern

python.DiskUtils.FileSet._white_pattern
private

Definition at line 295 of file DiskUtils.py.

◆ backend

python.DiskUtils.FileSet.backend

Definition at line 292 of file DiskUtils.py.

◆ broken

python.DiskUtils.FileSet.broken

Definition at line 301 of file DiskUtils.py.

◆ lb_map

python.DiskUtils.FileSet.lb_map

Definition at line 302 of file DiskUtils.py.


The documentation for this class was generated from the following file:
createLinkingScheme.iter
iter
Definition: createLinkingScheme.py:62
vtune_athena.format
format
Definition: vtune_athena.py:14
max
constexpr double max()
Definition: ap_fixedTest.cxx:33
python.Bindings.__iter__
__iter__
Definition: Control/AthenaPython/python/Bindings.py:794
CaloClusterListBadChannel.cls
cls
Definition: CaloClusterListBadChannel.py:8
search
void search(TDirectory *td, const std::string &s, std::string cwd, node *n)
recursive directory search for TH1 and TH2 and TProfiles
Definition: hcg.cxx:738
covarianceTool.filter
filter
Definition: covarianceTool.py:514
python.getCurrentFolderTag.fn
fn
Definition: getCurrentFolderTag.py:79
PyAthena::repr
std::string repr(PyObject *o)
returns the string representation of a python object equivalent of calling repr(o) in python
Definition: PyAthenaUtils.cxx:106
python.DiskUtils.get_lumi_blocks
def get_lumi_blocks(root_file)
Definition: DiskUtils.py:142
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:25
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.processes.powheg.ZZ.ZZ.__init__
def __init__(self, base_directory, **kwargs)
Constructor: all process options are set here.
Definition: ZZ.py:18
Trk::open
@ open
Definition: BinningType.h:40
mc.generator
generator
Configure Herwig7 These are the commands corresponding to what would go into the regular Herwig infil...
Definition: mc.MGH7_FxFx_H71-DEFAULT_test.py:18
python.CaloAddPedShiftConfig.int
int
Definition: CaloAddPedShiftConfig.py:45
str
Definition: BTagTrackIpAccessor.cxx:11
python.dummyaccess.exists
def exists(filename)
Definition: dummyaccess.py:9
Trk::split
@ split
Definition: LayerMaterialProperties.h:38