ATLAS Offline Software
fileutil.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2 
3 import os, sys, re, time
4 from PyJobTransformsCore import dummyaccess, rfio
5 import stat as statconsts
6 
7 __doc__ = """A set of file utilities that can be used for several file systems (local files, rfio, castor)"""
8 
9 __all__ = [ ]
10 
11 defaultRetryMaxTime = 1.0 # seconds
12 defaultRetryStartTime = 0.1 # seconds
13 
14 def retry_function_time( func, args, retryException,
15  retryMaxTime = defaultRetryMaxTime,
16  retryStartTime = defaultRetryMaxTime ):
17  """Call function several times if it throws a <retryException>.
18  It will wait an increasing amount of time in between tries.
19  First waiting time is <retryStartTime>, which is increased by
20  a factor of 2 for each retry. It will give up and raise the
21  original exception if it still fails after a total retry time of <retryMaxTime>.
22  <func>: function to be called
23  <args>: tuple with the function arguments, or the single function argument"""
24  if type(args) is not tuple: args = (args,)
25  retryDelay = retryStartTime
26  if retryDelay <= 0: retryDelay = 0.1 # avoid infinite loop
27  OK = False
28  tStart = time.time()
29  while not OK:
30  try:
31  val = func( *args )
32  OK = True
33  except retryException:
34  OK = False
35  dt = time.time() - tStart
36  argsStr = ', '.join( [ '%r' % a for a in args ] )
37  if dt > retryMaxTime:
38  print ("%s(%s) Failed" % (func.__name__,argsStr))
39  raise
40  time.sleep(retryDelay)
41  retryDelay *= 2
42  print ("Retrying %s(%s)" % (func.__name__,argsStr))
43 
44  return val
45 
46 
47 def retry_file_access( func, args, retryException = OSError,
48  retryMaxTime = defaultRetryMaxTime,
49  retryStartTime = defaultRetryMaxTime ):
50 
51  return retry_function_time( func, args, retryException, retryMaxTime, retryStartTime )
52 
53 class AccessType:
54  def __init__(self, name, matchPattern, replaceWith, baseModule, pathModule):
55  self.name = name
56  self.matchPat = matchPattern
57  self.replacePat = replaceWith
58  self.baseModule = baseModule
59  self.pathModule = pathModule
60 
61 
62  def matches(self,filename):
63  return re.search( self.matchPat, filename ) is not None
64 
65 
66  def cleanUp(self,filename):
67  return re.sub( self.matchPat, self.replacePat, filename )
68 
69 
70 #supported access types
71 IO_LOCAL = AccessType('local' , r'(.*)' , r'\1' , os , os.path)
72 IO_RFIO = AccessType('rfio' , r'^rfio:' , r'rfio:' , rfio, rfio)
73 IO_CASTOR = AccessType('castor', r'^(?:rfio:)?/castor/', 'rfio:/castor/', rfio, rfio)
74 IO_XROOTD = AccessType('xrootd', r'^root:' , r'root:' , dummyaccess, dummyaccess )
75 IO_LFN = AccessType('lfn' , r'^LFN:' , r'LFN:' , dummyaccess, dummyaccess )
76 _accessTypes = ( IO_LFN, IO_XROOTD, IO_CASTOR, IO_RFIO, IO_LOCAL ) # IO_LOCAL should be last entry due to regex being very liberal
77 
78 
79 class Tee:
80  """A file utility like unix 'tee'. It writes any output to a file and to screen (stdout by default).
81  <option> if it has an 'a', append to logfile file, otherwise overwrite existing file."""
82  def __init__(self,filename,options='',screen=sys.stdout):
83  if 'a' in options:
84  fileMode = 'a'
85  else:
86  fileMode = 'w'
87  self.f = open (filename,fileMode)
88  self.screen = screen
89 
90  #
91  # override file functions
92  #
93  def write(self,s):
94  self.screen.write(s)
95  self.f.write(self,s)
96 
97 
98  def writelines(self,ls):
99  self.screen.writelines(ls)
100  self.f.writelines(self,ls)
101 
102 
103  def flush(self):
104  self.screen.flush()
105  self.f.flush()
106 
107 
108 
109 def get_access_type(filename):
110  for at in _accessTypes:
111  if at.matches( filename ): return at
112  return IO_LOCAL
113 
114 
115 def exists(filename):
116  at = get_access_type(filename)
117  return retry_file_access( at.pathModule.exists, filename )
118 
119 
120 def getsize(filename):
121  at = get_access_type(filename)
122  return retry_file_access( at.pathModule.getsize, filename )
123 
124 
125 def getmtime(filename):
126  at = get_access_type(filename)
127  return retry_file_access( at.pathModule.getmtime, filename )
128 
129 
130 def listdir(filename):
131  at = get_access_type(filename)
132  return retry_file_access( at.baseModule.listdir, filename )
133 
134 
135 def stat(filename):
136  at = get_access_type(filename)
137  return retry_file_access( at.baseModule.stat, filename )
138 
139 
140 def lstat(filename):
141  at = get_access_type(filename)
142  return retry_file_access( at.baseModule.lstat, filename )
143 
144 
145 def access(filename,mode):
146  at = get_access_type(filename)
147  return retry_file_access( at.baseModule.access, (filename,mode) )
148 
149 
150 def remove(filename):
151  """Remove file <filename> if it exists. Only supported for local files."""
152  at = get_access_type(filename)
153  if at == IO_LOCAL:
154  if exists(filename): retry_file_access( os.remove, filename )
155  else:
156  print ("WARNING: file %s file %s can not be removed" %
157  (at.name, filename))
158 
159 
160 def exists_suffix(filename,suffixRE):
161  """Test if a file exists in the same directory as <filename>, with the
162  same name, but with an additional suffix given as a regular expression
163  in <suffixRE>. It returns a list of all matching suffices, or an empty
164  list if no matching filename+suffix was found."""
165  sufs = []
166  dirname = os.path.dirname(filename) or os.curdir
167  # if directory does not exist, then file will never exist (and prevent crash later on)
168  if not os.path.isdir(dirname): return []
169  filename = os.path.basename(filename)
170  pat = re.compile( '^%s(%s)' % (filename,suffixRE) )
171  for f in listdir(dirname):
172  if pat.search( f ):
173  sufs.append( pat.sub( r'\1', f ) )
174 
175  return sufs
176 
177 
178 def exists_suffix_number(filename):
179  """Test if a file exists in the same directory as <filename>, with the same name,
180  but a non-negative integer added at the end of the name. It returns the filename
181  with the highest number added, or None if no such file exists."""
182  sufs = exists_suffix(filename, r'[0-9]+')
183  maxnum = -1
184  found = None
185  for suf in sufs:
186  num = int(suf)
187  if num > maxnum:
188  maxnum = num
189  found = filename + suf
190 
191  return found
192 
193 
194 def mode_string(filename):
195  return mode_to_string( lstat(filename).st_mode )
196 
197 
198 def mode_to_string(mode):
199  """Return the unix like string corresponding to the file access mode (rwxd etc)"""
200  modeList = ['-']*10
201  # first character (dir/symlink)
202  if statconsts.S_ISDIR(mode):
203  modeList[0] = 'd'
204  elif statconsts.S_ISLNK(mode):
205  modeList[0] = 'l'
206  # user modes
207  if mode & statconsts.S_IRUSR: modeList[1] = 'r'
208  if mode & statconsts.S_IWUSR: modeList[2] = 'w'
209  if mode & statconsts.S_ISUID: modeList[3] = 's'
210  elif mode & statconsts.S_IXUSR: modeList[3] = 'x'
211  # group modes
212  if mode & statconsts.S_IRGRP: modeList[4] = 'r'
213  if mode & statconsts.S_IWGRP: modeList[5] = 'w'
214  if mode & statconsts.S_ISGID: modeList[6] = 's'
215  elif mode & statconsts.S_IXGRP: modeList[6] = 'x'
216  # other modes
217  if mode & statconsts.S_IROTH: modeList[7] = 'r'
218  if mode & statconsts.S_IWOTH: modeList[8] = 'w'
219  if mode & statconsts.S_IXOTH: modeList[9] = 'x'
220 
221  return ''.join(modeList)
222 
python.fileutil.exists
def exists(filename)
Definition: fileutil.py:115
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
python.fileutil.AccessType.matchPat
matchPat
Definition: fileutil.py:56
python.fileutil.Tee.writelines
def writelines(self, ls)
Definition: fileutil.py:98
python.fileutil.AccessType.__init__
def __init__(self, name, matchPattern, replaceWith, baseModule, pathModule)
Definition: fileutil.py:54
python.fileutil.getmtime
def getmtime(filename)
Definition: fileutil.py:125
python.fileutil.Tee.flush
def flush(self)
Definition: fileutil.py:103
python.fileutil.getsize
def getsize(filename)
Definition: fileutil.py:120
python.fileutil.lstat
def lstat(filename)
Definition: fileutil.py:140
python.fileutil.Tee.screen
screen
Definition: fileutil.py:88
python.fileutil.AccessType
Definition: fileutil.py:53
python.fileutil.remove
def remove(filename)
Definition: fileutil.py:150
python.fileutil.exists_suffix
def exists_suffix(filename, suffixRE)
Definition: fileutil.py:160
python.fileutil.Tee.f
f
Definition: fileutil.py:87
python.fileutil.listdir
def listdir(filename)
Definition: fileutil.py:130
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.fileutil.exists_suffix_number
def exists_suffix_number(filename)
Definition: fileutil.py:178
python.fileutil.AccessType.pathModule
pathModule
Definition: fileutil.py:59
python.fileutil.get_access_type
def get_access_type(filename)
Definition: fileutil.py:109
python.fileutil.Tee
Definition: fileutil.py:79
python.fileutil.retry_function_time
def retry_function_time(func, args, retryException, retryMaxTime=defaultRetryMaxTime, retryStartTime=defaultRetryMaxTime)
Definition: fileutil.py:14
python.fileutil.Tee.write
def write(self, s)
Definition: fileutil.py:93
python.fileutil.AccessType.cleanUp
def cleanUp(self, filename)
Definition: fileutil.py:66
python.fileutil.mode_to_string
def mode_to_string(mode)
Definition: fileutil.py:198
python.fileutil.AccessType.replacePat
replacePat
Definition: fileutil.py:57
python.CaloScaleNoiseConfig.type
type
Definition: CaloScaleNoiseConfig.py:78
python.fileutil.access
def access(filename, mode)
Definition: fileutil.py:145
python.fileutil.AccessType.baseModule
baseModule
Definition: fileutil.py:58
python.fileutil.AccessType.name
name
Definition: fileutil.py:55
python.fileutil.AccessType.matches
def matches(self, filename)
Definition: fileutil.py:62
python.fileutil.stat
def stat(filename)
Definition: fileutil.py:135
python.fileutil.Tee.__init__
def __init__(self, filename, options='', screen=sys.stdout)
Definition: fileutil.py:82
python.fileutil.retry_file_access
def retry_file_access(func, args, retryException=OSError, retryMaxTime=defaultRetryMaxTime, retryStartTime=defaultRetryMaxTime)
Definition: fileutil.py:47
python.fileutil.mode_string
def mode_string(filename)
Definition: fileutil.py:194