ATLAS Offline Software
Public Member Functions | Public Attributes | Private Member Functions | Private Attributes | List of all members
python.DiskUtils.FileSet Class Reference
Collaboration diagram for python.DiskUtils.FileSet:

Public Member Functions

def __init__ (self, iterator, backend)
 
def from_single_file (cls, path, backend=None)
 
def from_directory (cls, path, backend=None)
 
def from_file_containing_list (cls, path, backend=None)
 
def from_glob (cls, pattern, backend=None)
 
def from_ds_info (cls, run, project, stream, base, backend=None)
 
def from_input (cls, input_string, backend=None)
 
def __iter__ (self)
 
def strict_mode (self, setting=True)
 
def matching (self, pattern)
 
def excluding (self, pattern)
 
def use_files_from (self, path)
 
def only_existing (self, setting=True)
 
def only_latest (self, setting=True)
 
def only_single_dataset (self, setting=True)
 
def with_lumi_blocks (self, map_file=None)
 

Public Attributes

 backend
 
 broken
 
 lb_map
 

Private Member Functions

def _with_lumi_blocks_from_map (self, map_file)
 
def _with_lumi_blocks_from_ara (self)
 

Private Attributes

 _iter
 
 _existing
 
 _white_pattern
 
 _black_pattern
 
 _strict
 
 _explicit
 
 _dedup
 
 _single_dataset
 

Detailed Description

Represents a list of input files.
This class abstracts over the different ways files can be specified, and
the different storage backends/protocols on which they reside. It is an
iterator, and provides some methods for filtering the file set. E.g.:

    fs = FileSet.from_input('/eos/atlas/path/to/dataset/')
    for f in fs.matching(r'.*AOD.*').only_existing():
        print(f)

Definition at line 252 of file DiskUtils.py.

Constructor & Destructor Documentation

◆ __init__()

def python.DiskUtils.FileSet.__init__ (   self,
  iterator,
  backend 
)

Definition at line 263 of file DiskUtils.py.

263  def __init__(self, iterator, backend):
264  self.backend = backend
265  self._iter = iterator
266  self._existing = False
267  self._white_pattern = None
268  self._black_pattern = None
269  self._strict = True
270  self._explicit = None
271  self._dedup = False
272  self._single_dataset = False
273  self.broken = []
274  self.lb_map = {}
275 

Member Function Documentation

◆ __iter__()

def python.DiskUtils.FileSet.__iter__ (   self)

Definition at line 320 of file DiskUtils.py.

320  def __iter__(self):
321  it = self._iter
322  if self._white_pattern:
323  it = filter(lambda x: self._white_pattern.search(x), it)
324  if self._black_pattern:
325  it = filter(lambda x: not self._black_pattern.search(x), it)
326  if self._existing: # see: only_existing
327  if self._strict:
328  def generator(i, b):
329  for f in i:
330  if b.exists(f):
331  yield f
332  else:
333  raise AccessError('File not found: ' + f)
334  it = generator(it, self.backend)
335  else:
336  it = filter(lambda x: self.backend.exists(x), it)
337  if self._explicit is not None: # see: use_files_from
338  def generator(i, strict):
339  for f in i:
340  name = os.path.basename(f)
341  if self._explicit.pop(name, False):
342  yield f
343  if strict and self._explicit:
344  for f in self._explicit: print('Missing:', f)
345  raise FilterError('Not all explicit files were found.')
346  it = generator(it, self._strict)
347  if self._dedup: # see: only_latest
348  def fn(m, f):
349  name, ext = os.path.splitext(f)
350  if name in m:
351  m[name] = str(max(int(m[name]), int(ext[1:])))
352  else:
353  m[name] = ext[1:]
354  return m
355  def generator(em):
356  for name, ext in em.items():
357  yield '.'.join([name, ext])
358  it = generator(functools.reduce(fn, self, {}))
359  if self._single_dataset: # see: only_single_dataset
360  def generator(i):
361  dataset = None
362  for f in i:
363  ds = '.'.join(f.split('.')[0:3])
364  if dataset is None:
365  dataset = ds
366  if ds == dataset:
367  yield f
368  else:
369  raise FilterError(
370  "Files found from more than one dataset: '{}' != '{}'"
371  .format(ds, dataset))
372  it = generator(it)
373  it = map(lambda x: self.backend.wrap(x), it)
374  return it
375 

◆ _with_lumi_blocks_from_ara()

def python.DiskUtils.FileSet._with_lumi_blocks_from_ara (   self)
private

Definition at line 454 of file DiskUtils.py.

454  def _with_lumi_blocks_from_ara(self):
455  def generator(s):
456  for f in s:
457  try:
458  lbs = get_lumi_blocks(f)
459  except AccessError:
460  if s._strict:
461  raise
462  else:
463  s.broken.append(f)
464  continue
465  yield f, set(lbs)
466  return generator(self)

◆ _with_lumi_blocks_from_map()

def python.DiskUtils.FileSet._with_lumi_blocks_from_map (   self,
  map_file 
)
private

Definition at line 434 of file DiskUtils.py.

434  def _with_lumi_blocks_from_map(self, map_file):
435  with open(map_file) as mf:
436  for line in mf:
437  print(line)
438  fname = line.split(' ')[0]
439  print(line.split(' ')[0])
440  print(line.split(' ')[1])
441  lbs = set(int(l) for l in line.split(' ')[1].split(','))
442  self.lb_map[fname] = lbs
443  def generator(s):
444  for f in s:
445  try:
446  yield f, s.lb_map[os.path.basename(f)]
447  except KeyError:
448  if s._strict:
449  raise
450  else:
451  s.broken.append(f)
452  return generator(self)
453 

◆ excluding()

def python.DiskUtils.FileSet.excluding (   self,
  pattern 
)
Skip filenames matching the provided regular expression. 

Definition at line 394 of file DiskUtils.py.

394  def excluding(self, pattern):
395  ''' Skip filenames matching the provided regular expression. '''
396  self._black_pattern = re.compile(pattern) if pattern else None
397  return self
398 

◆ from_directory()

def python.DiskUtils.FileSet.from_directory (   cls,
  path,
  backend = None 
)

Definition at line 281 of file DiskUtils.py.

281  def from_directory(cls, path, backend=None):
282  be = backend or Local()
283  return cls(be.children(path), be)
284 

◆ from_ds_info()

def python.DiskUtils.FileSet.from_ds_info (   cls,
  run,
  project,
  stream,
  base,
  backend = None 
)

Definition at line 298 of file DiskUtils.py.

298  def from_ds_info(cls, run, project, stream, base, backend=None):
299  path = os.path.join(base, project, stream,
300  '{:0{digits}d}'.format(int(run), digits=8))
301  return cls.from_directory(path, backend=backend)
302 

◆ from_file_containing_list()

def python.DiskUtils.FileSet.from_file_containing_list (   cls,
  path,
  backend = None 
)

Definition at line 286 of file DiskUtils.py.

286  def from_file_containing_list(cls, path, backend=None):
287  with open(path) as lf:
288  listtoiter = [l.strip() for l in lf.readlines()]
289  iterator = iter(listtoiter)
290  return cls(iterator, backend or Local())
291 

◆ from_glob()

def python.DiskUtils.FileSet.from_glob (   cls,
  pattern,
  backend = None 
)

Definition at line 293 of file DiskUtils.py.

293  def from_glob(cls, pattern, backend=None):
294  be = backend or Local()
295  return cls(be.glob(pattern), be)
296 

◆ from_input()

def python.DiskUtils.FileSet.from_input (   cls,
  input_string,
  backend = None 
)
Guess what kind of input file specification was provided. 

Definition at line 304 of file DiskUtils.py.

304  def from_input(cls, input_string, backend=None):
305  ''' Guess what kind of input file specification was provided. '''
306  be = backend or Local()
307  if be.is_directory(input_string):
308  return cls.from_directory(input_string, be)
309  elif Local().is_file(input_string) and not (
310  input_string.endswith('.root') or
311  input_string[-7:-2] == '.root'):
312  return cls.from_file_containing_list(input_string, be)
313  elif be.is_file(input_string):
314  return cls.from_single_file(input_string, be)
315  elif '*' in input_string or '?' in input_string or '[' in input_string:
316  return cls.from_glob(input_string, be)
317  else:
318  raise AccessError('Unable to resolve input: ' + repr(input_string))
319 

◆ from_single_file()

def python.DiskUtils.FileSet.from_single_file (   cls,
  path,
  backend = None 
)

Definition at line 277 of file DiskUtils.py.

277  def from_single_file(cls, path, backend=None):
278  return cls(iter([path]), backend or Local())
279 

◆ matching()

def python.DiskUtils.FileSet.matching (   self,
  pattern 
)
Only accept filenames matching the provided regular expression. 

Definition at line 389 of file DiskUtils.py.

389  def matching(self, pattern):
390  ''' Only accept filenames matching the provided regular expression. '''
391  self._white_pattern = re.compile(pattern) if pattern else None
392  return self
393 

◆ only_existing()

def python.DiskUtils.FileSet.only_existing (   self,
  setting = True 
)
Only use existing files. 

Definition at line 408 of file DiskUtils.py.

408  def only_existing(self, setting=True):
409  ''' Only use existing files. '''
410  self._existing = setting
411  return self
412 

◆ only_latest()

def python.DiskUtils.FileSet.only_latest (   self,
  setting = True 
)
Keep only the latest retry from sets like `*.1`, `*.2`. 

Definition at line 413 of file DiskUtils.py.

413  def only_latest(self, setting=True):
414  ''' Keep only the latest retry from sets like `*.1`, `*.2`. '''
415  self._dedup = setting
416  return self
417 

◆ only_single_dataset()

def python.DiskUtils.FileSet.only_single_dataset (   self,
  setting = True 
)
Require all files to be from the same dataset. 

Definition at line 418 of file DiskUtils.py.

418  def only_single_dataset(self, setting=True):
419  ''' Require all files to be from the same dataset. '''
420  self._single_dataset = setting
421  return self
422 

◆ strict_mode()

def python.DiskUtils.FileSet.strict_mode (   self,
  setting = True 
)
When strict, errors are raised in the following cases (which
otherwise cause the corresponding files to be silently skipped):

  * When LB info is requested but cannot be found for a file (because
    it was not in the map file, or we couldn't open the ROOT file).
  * When `only_existing` is set and a file is missing.
  * When a file list is provided and not all of the files it mentions
    were encountered by the end of iteration.

Definition at line 376 of file DiskUtils.py.

376  def strict_mode(self, setting=True):
377  """ When strict, errors are raised in the following cases (which
378  otherwise cause the corresponding files to be silently skipped):
379 
380  * When LB info is requested but cannot be found for a file (because
381  it was not in the map file, or we couldn't open the ROOT file).
382  * When `only_existing` is set and a file is missing.
383  * When a file list is provided and not all of the files it mentions
384  were encountered by the end of iteration.
385  """
386  self._strict = setting
387  return self
388 

◆ use_files_from()

def python.DiskUtils.FileSet.use_files_from (   self,
  path 
)
Use specific filenames from within the provided dataset. 

Definition at line 399 of file DiskUtils.py.

399  def use_files_from(self, path):
400  ''' Use specific filenames from within the provided dataset. '''
401  if path:
402  with open(path) as lf:
403  self._explicit = [l.strip() for l in lf.readlines()]
404  else:
405  self._explicit = None
406  return self
407 

◆ with_lumi_blocks()

def python.DiskUtils.FileSet.with_lumi_blocks (   self,
  map_file = None 
)
Lookup the luminosity blocks contained in each file.
If a map file is provided it will be queried for the LB mapping,
otherwise each file will be opened and accessed using AthenaROOTAccess
which can be a little slow.

Definition at line 423 of file DiskUtils.py.

423  def with_lumi_blocks(self, map_file=None):
424  """ Lookup the luminosity blocks contained in each file.
425  If a map file is provided it will be queried for the LB mapping,
426  otherwise each file will be opened and accessed using AthenaROOTAccess
427  which can be a little slow.
428  """
429  if map_file:
430  return self._with_lumi_blocks_from_map(map_file)
431  else:
432  return self._with_lumi_blocks_from_ara()
433 

Member Data Documentation

◆ _black_pattern

python.DiskUtils.FileSet._black_pattern
private

Definition at line 268 of file DiskUtils.py.

◆ _dedup

python.DiskUtils.FileSet._dedup
private

Definition at line 271 of file DiskUtils.py.

◆ _existing

python.DiskUtils.FileSet._existing
private

Definition at line 266 of file DiskUtils.py.

◆ _explicit

python.DiskUtils.FileSet._explicit
private

Definition at line 270 of file DiskUtils.py.

◆ _iter

python.DiskUtils.FileSet._iter
private

Definition at line 265 of file DiskUtils.py.

◆ _single_dataset

python.DiskUtils.FileSet._single_dataset
private

Definition at line 272 of file DiskUtils.py.

◆ _strict

python.DiskUtils.FileSet._strict
private

Definition at line 269 of file DiskUtils.py.

◆ _white_pattern

python.DiskUtils.FileSet._white_pattern
private

Definition at line 267 of file DiskUtils.py.

◆ backend

python.DiskUtils.FileSet.backend

Definition at line 264 of file DiskUtils.py.

◆ broken

python.DiskUtils.FileSet.broken

Definition at line 273 of file DiskUtils.py.

◆ lb_map

python.DiskUtils.FileSet.lb_map

Definition at line 274 of file DiskUtils.py.


The documentation for this class was generated from the following file:
createLinkingScheme.iter
iter
Definition: createLinkingScheme.py:62
vtune_athena.format
format
Definition: vtune_athena.py:14
max
constexpr double max()
Definition: ap_fixedTest.cxx:33
python.Bindings.__iter__
__iter__
Definition: Control/AthenaPython/python/Bindings.py:794
CaloClusterListBadChannel.cls
cls
Definition: CaloClusterListBadChannel.py:8
python.processes.powheg.ZZj_MiNNLO.ZZj_MiNNLO.__init__
def __init__(self, base_directory, **kwargs)
Constructor: all process options are set here.
Definition: ZZj_MiNNLO.py:18
search
void search(TDirectory *td, const std::string &s, std::string cwd, node *n)
recursive directory search for TH1 and TH2 and TProfiles
Definition: hcg.cxx:738
covarianceTool.filter
filter
Definition: covarianceTool.py:514
python.getCurrentFolderTag.fn
fn
Definition: getCurrentFolderTag.py:79
PyAthena::repr
std::string repr(PyObject *o)
returns the string representation of a python object equivalent of calling repr(o) in python
Definition: PyAthenaUtils.cxx:106
python.DiskUtils.get_lumi_blocks
def get_lumi_blocks(root_file)
Definition: DiskUtils.py:142
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:26
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
Trk::open
@ open
Definition: BinningType.h:40
mc.generator
generator
Configure Herwig7 These are the commands corresponding to what would go into the regular Herwig infil...
Definition: mc.MGH7_FxFx_H71-DEFAULT_test.py:18
python.CaloAddPedShiftConfig.int
int
Definition: CaloAddPedShiftConfig.py:45
str
Definition: BTagTrackIpAccessor.cxx:11
python.dummyaccess.exists
def exists(filename)
Definition: dummyaccess.py:9
Trk::split
@ split
Definition: LayerMaterialProperties.h:38