ATLAS Offline Software
Functions | Variables
python.trfFileUtils Namespace Reference

Functions

def AthenaLiteFileInfo (filename, filetype, retrieveKeys=athFileInterestingKeys)
 New lightweight interface to getting a single file's metadata. More...
 
def HISTEntries (fileName)
 Determines number of events in a HIST file. More...
 
def NTUPEntries (fileName, treeNames)
 Determines number of entries in NTUP file with given tree names. More...
 
def PRWEntries (fileName, integral=False)
 Determines number of entries in PRW file. More...
 
def PHYSVALEntries (fileName, integral=False)
 Determines number of entries in NTUP_PHYSVAL file. More...
 
def ROOTGetSize (filename)
 Get the size of a file via ROOT's TFile. More...
 
def urlType (filename)
 Return the LAN access type for a file URL. More...
 

Variables

 msg
 
 athFileInterestingKeys
 

Function Documentation

◆ AthenaLiteFileInfo()

def python.trfFileUtils.AthenaLiteFileInfo (   filename,
  filetype,
  retrieveKeys = athFileInterestingKeys 
)

New lightweight interface to getting a single file's metadata.

Definition at line 23 of file trfFileUtils.py.

23 def AthenaLiteFileInfo(filename, filetype, retrieveKeys = athFileInterestingKeys):
24  msg.debug('Calling AthenaLiteFileInfo for {0} (type {1})'.format(filename, filetype))
25  from PyUtils.MetaReader import read_metadata
26 
27  metaDict = {}
28  try:
29  meta = read_metadata(filename,None,'lite')[filename]
30  msg.debug('read_metadata came back for {0}'.format(filename))
31  metaDict[filename] = {}
32  for key in retrieveKeys:
33  msg.debug('Looking for key {0}'.format(key))
34  try:
35  metaval = meta[key]
36  metaDict[filename][key] = metaval.lower() if key == 'file_type' else metaval
37  except KeyError:
38  msg.warning('Missing key in athFile info: {0}'.format(key))
39  except (ValueError, AssertionError, ReferenceError):
40  msg.error('Problem in getting metadata for {0}'.format(filename))
41  return None
42  msg.debug('Returning {0}'.format(metaDict))
43  return metaDict
44 

◆ HISTEntries()

def python.trfFileUtils.HISTEntries (   fileName)

Determines number of events in a HIST file.

Basically taken from PyJobTransformsCore.trfutil.MonitorHistFile

Parameters
fileNamePath to the HIST file.
Returns
  • Number of events.
  • None if the determination failed.
Note
Use the PyUtils forking decorator to ensure that ROOT is run completely within a child process and will not 'pollute' the parent python process with unthread-safe bits of code (otherwise strange hangs are observed on subsequent uses of ROOT)

Definition at line 55 of file trfFileUtils.py.

55 def HISTEntries(fileName):
56 
57  root = import_root()
58 
59  fname = root.TFile.Open(fileName, 'READ')
60 
61  if not (isinstance(fname, root.TFile) and fname.IsOpen()):
62  return None
63 
64  rundir = None
65  keys = fname.GetListOfKeys()
66 
67  for key in keys:
68 
69  name=key.GetName()
70 
71  if name.startswith('run_') and name != 'run_multiple':
72 
73  if rundir is not None:
74  msg.warning('Found two run_ directories in HIST file %s: %s and %s', fileName, rundir, name)
75  return None
76  else:
77  rundir = name
78 
79  del name
80 
81  if rundir is None:
82  msg.warning( 'Unable to find run directory in HIST file %s', fileName )
83  fname.Close()
84  return None
85 
86  msg.info( 'Using run directory %s for event counting of HIST file %s. ', rundir, fileName )
87 
88  hpath = '%s/GLOBAL/DQTDataFlow/events_lb' % rundir
89  possibleLBs = []
90  if 'tmp.HIST_' in fileName:
91  msg.info( 'Special case for temporary HIST file {0}. '.format( fileName ) )
92  h = fname.Get('{0}'.format(rundir))
93  for directories in h.GetListOfKeys() :
94  if 'lb' in directories.GetName():
95  msg.info( 'Using {0} in tmp HIST file {1}. '.format(directories.GetName(), fileName ) )
96  hpath = rundir+'/'+str(directories.GetName())+'/GLOBAL/DQTDataFlow/events_lb'
97  possibleLBs.append(hpath)
98  else:
99  msg.info( 'Classical case for HIST file {0}. '.format( fileName ) )
100  possibleLBs.append(hpath)
101  nev = 0
102  if len(possibleLBs) == 0:
103  msg.warning( 'Unable to find events_lb histogram in HIST file %s', fileName )
104  fname.Close()
105  return None
106  for hpath in possibleLBs:
107  h = fname.Get(hpath)
108 
109  if not isinstance( h, root.TH1 ):
110  msg.warning( 'Unable to retrieve %s in HIST file %s.', hpath, fileName )
111  fname.Close()
112  return None
113 
114  nBinsX = h.GetNbinsX()
115  nevLoc = 0
116 
117  for i in range(1, nBinsX):
118 
119  if h[i] < 0:
120  msg.warning( 'Negative number of events for step %s in HIST file %s.', h.GetXaxis().GetBinLabel(i), fileName )
121  fname.Close()
122  return None
123 
124  elif h[i] == 0:
125  continue
126 
127  if nevLoc == 0:
128  nevLoc = h[i]
129 
130  else:
131  if nevLoc != h[i]:
132  msg.warning( 'Mismatch in events per step in HIST file %s; most recent step seen is %s.', fileName, h.GetXaxis().GetBinLabel(i) )
133  fname.Close()
134  return None
135  nev += nevLoc
136  fname.Close()
137  return nev
138 
139 
140 

◆ NTUPEntries()

def python.trfFileUtils.NTUPEntries (   fileName,
  treeNames 
)

Determines number of entries in NTUP file with given tree names.

Basically taken from PyJobTransformsCore.trfutil.ntup_entries.

Parameters
fileNamePath to the NTUP file.
treeNamesTree name or list of tree names. In the latter case it is checked if all trees contain the same number of events
Returns
  • Number of entries.
  • None if the determination failed.
Note
Use the PyUtils forking decorator to ensure that ROOT is run completely within a child process and will not 'pollute' the parent python process with unthread-safe bits of code (otherwise strange hangs are observed on subsequent uses of ROOT)

Definition at line 153 of file trfFileUtils.py.

153 def NTUPEntries(fileName, treeNames):
154 
155  if not isinstance( treeNames, list ):
156  treeNames=[treeNames]
157 
158  root = import_root()
159 
160  fname = root.TFile.Open(fileName, 'READ')
161 
162  if not (isinstance(fname, root.TFile) and fname.IsOpen()):
163  return None
164 
165  prevNum=None
166  prevTree=None
167 
168  for treeName in treeNames:
169 
170  tree = fname.Get(treeName)
171 
172  if not isinstance(tree, root.TTree):
173  return None
174 
175  num = tree.GetEntriesFast()
176 
177  if not num>=0:
178  msg.warning('GetEntriesFast returned non positive value for tree %s in NTUP file %s.', treeName, fileName )
179  return None
180 
181  if prevNum is not None and prevNum != num:
182  msg.warning( "Found diffferent number of entries in tree %s and tree %s of file %s.", treeName, prevTree, fileName )
183  return None
184 
185  numberOfEntries=num
186  prevTree=treeName
187  del num
188  del tree
189 
190  fname.Close()
191 
192  return numberOfEntries
193 
194 

◆ PHYSVALEntries()

def python.trfFileUtils.PHYSVALEntries (   fileName,
  integral = False 
)

Determines number of entries in NTUP_PHYSVAL file.

Parameters
fileNamePath to the PHYSVAL file.
integralReturns sum of weights if true
Returns
  • Number of entries.
  • Sum of weights if integral is true.
  • None if the determination failed.
Note
Use the PyCmt forking decorator to ensure that ROOT is run completely within a child process and will not 'pollute' the parent python process with unthread-safe bits of code (otherwise strange hangs are observed on subsequent uses of ROOT)

Definition at line 249 of file trfFileUtils.py.

249 def PHYSVALEntries(fileName, integral=False):
250 
251  root = import_root()
252 
253  fname = root.TFile.Open(fileName, 'READ')
254 
255  if not (isinstance(fname, root.TFile) and fname.IsOpen()):
256  return None
257 
258  aipc = fname.Get("/EventInfo/EventInfo_actualInteractionsPerCrossing")
259 
260  if aipc is None:
261  # Not PHYSVAL...
262  return None
263 
264  # If we want the weights, give us the weights
265  if integral:
266  return aipc.Integral()
267 
268  # Otherwise we just want the entries
269  return int(aipc.GetEntries())
270 
271 

◆ PRWEntries()

def python.trfFileUtils.PRWEntries (   fileName,
  integral = False 
)

Determines number of entries in PRW file.

Parameters
fileNamePath to the PRW file.
integralReturns sum of weights if true
Returns
  • Number of entries.
  • Sum of weights if integral is true.
  • None if the determination failed.
Note
Use the PyCmt forking decorator to ensure that ROOT is run completely within a child process and will not 'pollute' the parent python process with unthread-safe bits of code (otherwise strange hangs are observed on subsequent uses of ROOT)

Definition at line 206 of file trfFileUtils.py.

206 def PRWEntries(fileName, integral=False):
207 
208  root = import_root()
209 
210  fname = root.TFile.Open(fileName, 'READ')
211 
212  if not (isinstance(fname, root.TFile) and fname.IsOpen()):
213  return None
214 
215  rundir = None
216 
217  for key in fname.GetListOfKeys():
218  if key.GetName()=='PileupReweighting':
219  rundir = fname.Get('PileupReweighting')
220  break
221  # Not PRW...
222 
223  if rundir is None: return None
224 
225  total = 0
226  for key in rundir.GetListOfKeys():
227  if 'pileup' in key.GetName():
228  msg.debug('Working on file '+fileName+' histo '+key.GetName())
229  if integral:
230  total += rundir.Get(key.GetName()).Integral()
231  else:
232  total += rundir.Get(key.GetName()).GetEntries()
233  # Was not one of our histograms
234  # Make sure we return an int for the number of events
235  return int(total)
236 
237 

◆ ROOTGetSize()

def python.trfFileUtils.ROOTGetSize (   filename)

Get the size of a file via ROOT's TFile.

Use TFile.Open to retrieve a ROOT filehandle, which will deal with all non-posix filesystems. Return the GetSize() value. The option filetype=raw is added to ensure this works for non-ROOT files too (e.g. BS)

Note
Use the PyUtils forking decorator to ensure that ROOT is run completely within a child process and will not 'pollute' the parent python process with unthread-safe bits of code (otherwise strange hangs are observed on subsequent uses of ROOT)
Parameters
filenameFilename to get size of
Returns
fileSize or None if there was a problem

Definition at line 282 of file trfFileUtils.py.

282 def ROOTGetSize(filename):
283  root = import_root()
284 
285  try:
286  msg.debug('Calling TFile.Open for {0}'.format(filename))
287  extraparam = '?filetype=raw'
288  if filename.startswith("https") or filename.startswith("davs"):
289  try:
290  pos = filename.find("?")
291  if pos>=0:
292  extraparam = '&filetype=raw'
293  else:
294  extraparam = '?filetype=raw'
295  except Exception:
296  extraparam = '?filetype=raw'
297  fname = root.TFile.Open(filename + extraparam, 'READ')
298  fsize = fname.GetSize()
299  msg.debug('Got size {0} from TFile.GetSize'.format(fsize))
300  except ReferenceError:
301  msg.error('Failed to get size of {0}'.format(filename))
302  return None
303 
304  fname.Close()
305  del root
306  return fsize
307 
308 

◆ urlType()

def python.trfFileUtils.urlType (   filename)

Return the LAN access type for a file URL.

Parameters
filenameName of file to examine
Returns
  • String with LAN protocol

Definition at line 313 of file trfFileUtils.py.

313 def urlType(filename):
314  if filename.startswith('dcap:'):
315  return 'dcap'
316  if filename.startswith('root:'):
317  return 'root'
318  if filename.startswith('rfio:'):
319  return 'rfio'
320  if filename.startswith('file:'):
321  return 'posix'
322  if filename.startswith('https:'):
323  return 'root'
324  if filename.startswith('davs:'):
325  return 'root'
326  return 'posix'
327 

Variable Documentation

◆ athFileInterestingKeys

python.trfFileUtils.athFileInterestingKeys

Definition at line 20 of file trfFileUtils.py.

◆ msg

python.trfFileUtils.msg

Definition at line 10 of file trfFileUtils.py.

python.trfFileUtils.PHYSVALEntries
def PHYSVALEntries(fileName, integral=False)
Determines number of entries in NTUP_PHYSVAL file.
Definition: trfFileUtils.py:249
python.trfFileUtils.ROOTGetSize
def ROOTGetSize(filename)
Get the size of a file via ROOT's TFile.
Definition: trfFileUtils.py:282
vtune_athena.format
format
Definition: vtune_athena.py:14
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
python.MetaReader.read_metadata
def read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter=None, unique_tag_info_values=True, ignoreNonExistingLocalFiles=False)
Definition: MetaReader.py:52
python.RootUtils.import_root
def import_root(batch=True)
functions --------------------------------------------------------------—
Definition: RootUtils.py:22
python.trfFileUtils.PRWEntries
def PRWEntries(fileName, integral=False)
Determines number of entries in PRW file.
Definition: trfFileUtils.py:206
python.trfFileUtils.NTUPEntries
def NTUPEntries(fileName, treeNames)
Determines number of entries in NTUP file with given tree names.
Definition: trfFileUtils.py:153
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
python.trfFileUtils.AthenaLiteFileInfo
def AthenaLiteFileInfo(filename, filetype, retrieveKeys=athFileInterestingKeys)
New lightweight interface to getting a single file's metadata.
Definition: trfFileUtils.py:23
python.trfFileUtils.urlType
def urlType(filename)
Return the LAN access type for a file URL.
Definition: trfFileUtils.py:313
generate::GetEntries
double GetEntries(TH1D *h, int ilow, int ihi)
Definition: rmsFrac.cxx:20
str
Definition: BTagTrackIpAccessor.cxx:11
python.trfFileUtils.HISTEntries
def HISTEntries(fileName)
Determines number of events in a HIST file.
Definition: trfFileUtils.py:55