ATLAS Offline Software
trfFileUtils.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2 
3 
8 
9 import logging
10 msg = logging.getLogger(__name__)
11 
12 # @note Use the PyUtils forking decorator to ensure that ROOT is run completely within
13 # a child process and will not 'pollute' the parent python process with unthread-safe
14 # bits of code (otherwise strange hangs are observed on subsequent uses of ROOT)
15 import PyUtils.Decorators as _decos
16 
17 from PyUtils.RootUtils import import_root
18 
19 # Use a stripped down key list, as we retrieve only 'fast' metadata
20 athFileInterestingKeys = ['file_size', 'file_guid', 'file_type', 'nentries']
21 
22 
23 def AthenaLiteFileInfo(filename, filetype, retrieveKeys = athFileInterestingKeys):
24  msg.debug('Calling AthenaLiteFileInfo for {0} (type {1})'.format(filename, filetype))
25  from PyUtils.MetaReader import read_metadata
26 
27  metaDict = {}
28  try:
29  meta = read_metadata(filename,None,'lite')[filename]
30  msg.debug('read_metadata came back for {0}'.format(filename))
31  metaDict[filename] = {}
32  for key in retrieveKeys:
33  msg.debug('Looking for key {0}'.format(key))
34  try:
35  metaval = meta[key]
36  metaDict[filename][key] = metaval.lower() if key == 'file_type' else metaval
37  except KeyError:
38  msg.warning('Missing key in athFile info: {0}'.format(key))
39  except (ValueError, AssertionError, ReferenceError):
40  msg.error('Problem in getting metadata for {0}'.format(filename))
41  return None
42  msg.debug('Returning {0}'.format(metaDict))
43  return metaDict
44 
45 
54 @_decos.forking
55 def HISTEntries(fileName):
56 
57  root = import_root()
58 
59  fname = root.TFile.Open(fileName, 'READ')
60 
61  if not (isinstance(fname, root.TFile) and fname.IsOpen()):
62  return None
63 
64  rundir = None
65  keys = fname.GetListOfKeys()
66 
67  for key in keys:
68 
69  name=key.GetName()
70 
71  if name.startswith('run_') and name != 'run_multiple':
72 
73  if rundir is not None:
74  msg.warning('Found two run_ directories in HIST file %s: %s and %s', fileName, rundir, name)
75  return None
76  else:
77  rundir = name
78 
79  del name
80 
81  if rundir is None:
82  msg.warning( 'Unable to find run directory in HIST file %s', fileName )
83  fname.Close()
84  return None
85 
86  msg.info( 'Using run directory %s for event counting of HIST file %s. ', rundir, fileName )
87 
88  hpath = '%s/GLOBAL/DQTDataFlow/events_lb' % rundir
89  possibleLBs = []
90  if 'tmp.HIST_' in fileName:
91  msg.info( 'Special case for temporary HIST file {0}. '.format( fileName ) )
92  h = fname.Get('{0}'.format(rundir))
93  for directories in h.GetListOfKeys() :
94  if 'lb' in directories.GetName():
95  msg.info( 'Using {0} in tmp HIST file {1}. '.format(directories.GetName(), fileName ) )
96  hpath = rundir+'/'+str(directories.GetName())+'/GLOBAL/DQTDataFlow/events_lb'
97  possibleLBs.append(hpath)
98  else:
99  msg.info( 'Classical case for HIST file {0}. '.format( fileName ) )
100  possibleLBs.append(hpath)
101  nev = 0
102  if len(possibleLBs) == 0:
103  msg.warning( 'Unable to find events_lb histogram in HIST file %s', fileName )
104  fname.Close()
105  return None
106  for hpath in possibleLBs:
107  h = fname.Get(hpath)
108 
109  if not isinstance( h, root.TH1 ):
110  msg.warning( 'Unable to retrieve %s in HIST file %s.', hpath, fileName )
111  fname.Close()
112  return None
113 
114  nBinsX = h.GetNbinsX()
115  nevLoc = 0
116 
117  for i in range(1, nBinsX):
118 
119  if h[i] < 0:
120  msg.warning( 'Negative number of events for step %s in HIST file %s.', h.GetXaxis().GetBinLabel(i), fileName )
121  fname.Close()
122  return None
123 
124  elif h[i] == 0:
125  continue
126 
127  if nevLoc == 0:
128  nevLoc = h[i]
129 
130  else:
131  if nevLoc != h[i]:
132  msg.warning( 'Mismatch in events per step in HIST file %s; most recent step seen is %s.', fileName, h.GetXaxis().GetBinLabel(i) )
133  fname.Close()
134  return None
135  nev += nevLoc
136  fname.Close()
137  return nev
138 
139 
140 
141 
152 @_decos.forking
153 def NTUPEntries(fileName, treeNames):
154 
155  if not isinstance( treeNames, list ):
156  treeNames=[treeNames]
157 
158  root = import_root()
159 
160  fname = root.TFile.Open(fileName, 'READ')
161 
162  if not (isinstance(fname, root.TFile) and fname.IsOpen()):
163  return None
164 
165  prevNum=None
166  prevTree=None
167 
168  for treeName in treeNames:
169 
170  tree = fname.Get(treeName)
171 
172  if not isinstance(tree, root.TTree):
173  return None
174 
175  num = tree.GetEntriesFast()
176 
177  if not num>=0:
178  msg.warning('GetEntriesFast returned non positive value for tree %s in NTUP file %s.', treeName, fileName )
179  return None
180 
181  if prevNum is not None and prevNum != num:
182  msg.warning( "Found diffferent number of entries in tree %s and tree %s of file %s.", treeName, prevTree, fileName )
183  return None
184 
185  numberOfEntries=num
186  prevTree=treeName
187  del num
188  del tree
189 
190  fname.Close()
191 
192  return numberOfEntries
193 
194 
195 
205 @_decos.forking
206 def PRWEntries(fileName, integral=False):
207 
208  root = import_root()
209 
210  fname = root.TFile.Open(fileName, 'READ')
211 
212  if not (isinstance(fname, root.TFile) and fname.IsOpen()):
213  return None
214 
215  rundir = None
216 
217  for key in fname.GetListOfKeys():
218  if key.GetName()=='PileupReweighting':
219  rundir = fname.Get('PileupReweighting')
220  break
221  # Not PRW...
222 
223  if rundir is None: return None
224 
225  total = 0
226  for key in rundir.GetListOfKeys():
227  if 'pileup' in key.GetName():
228  msg.debug('Working on file '+fileName+' histo '+key.GetName())
229  if integral:
230  total += rundir.Get(key.GetName()).Integral()
231  else:
232  total += rundir.Get(key.GetName()).GetEntries()
233  # Was not one of our histograms
234  # Make sure we return an int for the number of events
235  return int(total)
236 
237 
238 
247 @_decos.forking
248 def ROOTGetSize(filename):
249  root = import_root()
250 
251  try:
252  msg.debug('Calling TFile.Open for {0}'.format(filename))
253  extraparam = '?filetype=raw'
254  if filename.startswith("https") or filename.startswith("davs"):
255  try:
256  pos = filename.find("?")
257  if pos>=0:
258  extraparam = '&filetype=raw'
259  else:
260  extraparam = '?filetype=raw'
261  except Exception:
262  extraparam = '?filetype=raw'
263  fname = root.TFile.Open(filename + extraparam, 'READ')
264  fsize = fname.GetSize()
265  msg.debug('Got size {0} from TFile.GetSize'.format(fsize))
266  except ReferenceError:
267  msg.error('Failed to get size of {0}'.format(filename))
268  return None
269 
270  fname.Close()
271  del root
272  return fsize
273 
274 
275 
279 def urlType(filename):
280  if filename.startswith('dcap:'):
281  return 'dcap'
282  if filename.startswith('root:'):
283  return 'root'
284  if filename.startswith('rfio:'):
285  return 'rfio'
286  if filename.startswith('file:'):
287  return 'posix'
288  if filename.startswith('https:'):
289  return 'root'
290  if filename.startswith('davs:'):
291  return 'root'
292  return 'posix'
293 
python.trfFileUtils.ROOTGetSize
def ROOTGetSize(filename)
Get the size of a file via ROOT's TFile.
Definition: trfFileUtils.py:248
vtune_athena.format
format
Definition: vtune_athena.py:14
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
python.MetaReader.read_metadata
def read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter=None, unique_tag_info_values=True, ignoreNonExistingLocalFiles=False)
Definition: MetaReader.py:52
python.RootUtils.import_root
def import_root(batch=True)
functions --------------------------------------------------------------—
Definition: RootUtils.py:22
python.trfFileUtils.PRWEntries
def PRWEntries(fileName, integral=False)
Determines number of entries in PRW file.
Definition: trfFileUtils.py:206
python.trfFileUtils.NTUPEntries
def NTUPEntries(fileName, treeNames)
Determines number of entries in NTUP file with given tree names.
Definition: trfFileUtils.py:153
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
python.trfFileUtils.AthenaLiteFileInfo
def AthenaLiteFileInfo(filename, filetype, retrieveKeys=athFileInterestingKeys)
New lightweight interface to getting a single file's metadata.
Definition: trfFileUtils.py:23
python.trfFileUtils.urlType
def urlType(filename)
Return the LAN access type for a file URL.
Definition: trfFileUtils.py:279
generate::GetEntries
double GetEntries(TH1D *h, int ilow, int ihi)
Definition: rmsFrac.cxx:20
str
Definition: BTagTrackIpAccessor.cxx:11
python.trfFileUtils.HISTEntries
def HISTEntries(fileName)
Determines number of events in a HIST file.
Definition: trfFileUtils.py:55