ATLAS Offline Software
Loading...
Searching...
No Matches
python.trfFileUtils Namespace Reference

Functions

 AthenaLiteFileInfo (filename, filetype, retrieveKeys=athFileInterestingKeys)
 New lightweight interface to getting a single file's metadata.
 HISTEntries (fileName)
 Determines number of events in a HIST file.
 NTUPEntries (fileName, treeNames)
 Determines number of entries in NTUP file with given tree names.
 PRWEntries (fileName, integral=False)
 Determines number of entries in PRW file.
 PHYSVALEntries (fileName, integral=False)
 Determines number of entries in NTUP_PHYSVAL file.
 ROOTGetSize (filename)
 Get the size of a file via ROOT's TFile.
 urlType (filename)
 Return the LAN access type for a file URL.

Variables

 msg = logging.getLogger(__name__)
list athFileInterestingKeys = ['file_size', 'file_guid', 'file_type', 'nentries']

Function Documentation

◆ AthenaLiteFileInfo()

python.trfFileUtils.AthenaLiteFileInfo ( filename,
filetype,
retrieveKeys = athFileInterestingKeys )

New lightweight interface to getting a single file's metadata.

Definition at line 23 of file trfFileUtils.py.

23def AthenaLiteFileInfo(filename, filetype, retrieveKeys = athFileInterestingKeys):
24 msg.debug('Calling AthenaLiteFileInfo for {0} (type {1})'.format(filename, filetype))
25 from PyUtils.MetaReader import read_metadata
26
27 metaDict = {}
28 try:
29 meta = read_metadata(filename,None,'lite')[filename]
30 msg.debug('read_metadata came back for {0}'.format(filename))
31 metaDict[filename] = {}
32 for key in retrieveKeys:
33 msg.debug('Looking for key {0}'.format(key))
34 try:
35 metaval = meta[key]
36 metaDict[filename][key] = metaval.lower() if key == 'file_type' else metaval
37 except KeyError:
38 msg.warning('Missing key in athFile info: {0}'.format(key))
39 except (ValueError, AssertionError, ReferenceError):
40 msg.error('Problem in getting metadata for {0}'.format(filename))
41 return None
42 msg.debug('Returning {0}'.format(metaDict))
43 return metaDict
44

◆ HISTEntries()

python.trfFileUtils.HISTEntries ( fileName)

Determines number of events in a HIST file.

Basically taken from PyJobTransformsCore.trfutil.MonitorHistFile

Parameters
fileNamePath to the HIST file.
Returns
  • Number of events.
  • None if the determination failed.
Note
Use the PyUtils forking decorator to ensure that ROOT is run completely within a child process and will not 'pollute' the parent python process with unthread-safe bits of code (otherwise strange hangs are observed on subsequent uses of ROOT)

Definition at line 55 of file trfFileUtils.py.

55def HISTEntries(fileName):
56
57 root = import_root()
58
59 fname = root.TFile.Open(fileName, 'READ')
60
61 if not (isinstance(fname, root.TFile) and fname.IsOpen()):
62 return None
63
64 rundir = None
65 keys = fname.GetListOfKeys()
66
67 for key in keys:
68
69 name=key.GetName()
70
71 if name.startswith('run_') and name != 'run_multiple':
72
73 if rundir is not None:
74 msg.warning('Found two run_ directories in HIST file %s: %s and %s', fileName, rundir, name)
75 return None
76 else:
77 rundir = name
78
79 del name
80
81 if rundir is None:
82 msg.warning( 'Unable to find run directory in HIST file %s', fileName )
83 fname.Close()
84 return None
85
86 msg.info( 'Using run directory %s for event counting of HIST file %s. ', rundir, fileName )
87
88 hpath = '%s/GLOBAL/DQTDataFlow/events_lb' % rundir
89 possibleLBs = []
90 if 'tmp.HIST_' in fileName:
91 msg.info( 'Special case for temporary HIST file {0}. '.format( fileName ) )
92 h = fname.Get('{0}'.format(rundir))
93 for directories in h.GetListOfKeys() :
94 if 'lb' in directories.GetName():
95 msg.info( 'Using {0} in tmp HIST file {1}. '.format(directories.GetName(), fileName ) )
96 hpath = rundir+'/'+str(directories.GetName())+'/GLOBAL/DQTDataFlow/events_lb'
97 possibleLBs.append(hpath)
98 else:
99 msg.info( 'Classical case for HIST file {0}. '.format( fileName ) )
100 possibleLBs.append(hpath)
101 nev = 0
102 if len(possibleLBs) == 0:
103 msg.warning( 'Unable to find events_lb histogram in HIST file %s', fileName )
104 fname.Close()
105 return None
106 for hpath in possibleLBs:
107 h = fname.Get(hpath)
108
109 if not isinstance( h, root.TH1 ):
110 msg.warning( 'Unable to retrieve %s in HIST file %s.', hpath, fileName )
111 fname.Close()
112 return None
113
114 nBinsX = h.GetNbinsX()
115 nevLoc = 0
116
117 for i in range(1, nBinsX):
118
119 if h[i] < 0:
120 msg.warning( 'Negative number of events for step %s in HIST file %s.', h.GetXaxis().GetBinLabel(i), fileName )
121 fname.Close()
122 return None
123
124 elif h[i] == 0:
125 continue
126
127 if nevLoc == 0:
128 nevLoc = h[i]
129
130 else:
131 if nevLoc != h[i]:
132 msg.warning( 'Mismatch in events per step in HIST file %s; most recent step seen is %s.', fileName, h.GetXaxis().GetBinLabel(i) )
133 fname.Close()
134 return None
135 nev += nevLoc
136 fname.Close()
137 return nev
138
139
140

◆ NTUPEntries()

python.trfFileUtils.NTUPEntries ( fileName,
treeNames )

Determines number of entries in NTUP file with given tree names.

Basically taken from PyJobTransformsCore.trfutil.ntup_entries.

Parameters
fileNamePath to the NTUP file.
treeNamesTree name or list of tree names. In the latter case it is checked if all trees contain the same number of events
Returns
  • Number of entries.
  • None if the determination failed.
Note
Use the PyUtils forking decorator to ensure that ROOT is run completely within a child process and will not 'pollute' the parent python process with unthread-safe bits of code (otherwise strange hangs are observed on subsequent uses of ROOT)

Definition at line 153 of file trfFileUtils.py.

153def NTUPEntries(fileName, treeNames):
154
155 if not isinstance( treeNames, list ):
156 treeNames=[treeNames]
157
158 root = import_root()
159
160 fname = root.TFile.Open(fileName, 'READ')
161
162 if not (isinstance(fname, root.TFile) and fname.IsOpen()):
163 return None
164
165 prevNum=None
166 prevTree=None
167
168 for treeName in treeNames:
169
170 tree = fname.Get(treeName)
171
172 if not isinstance(tree, root.TTree):
173 return None
174
175 num = tree.GetEntriesFast()
176
177 if not num>=0:
178 msg.warning('GetEntriesFast returned non positive value for tree %s in NTUP file %s.', treeName, fileName )
179 return None
180
181 if prevNum is not None and prevNum != num:
182 msg.warning( "Found diffferent number of entries in tree %s and tree %s of file %s.", treeName, prevTree, fileName )
183 return None
184
185 numberOfEntries=num
186 prevTree=treeName
187 del num
188 del tree
189
190 fname.Close()
191
192 return numberOfEntries
193
194

◆ PHYSVALEntries()

python.trfFileUtils.PHYSVALEntries ( fileName,
integral = False )

Determines number of entries in NTUP_PHYSVAL file.

Parameters
fileNamePath to the PHYSVAL file.
integralReturns sum of weights if true
Returns
  • Number of entries.
  • Sum of weights if integral is true.
  • None if the determination failed.
Note
Use the PyCmt forking decorator to ensure that ROOT is run completely within a child process and will not 'pollute' the parent python process with unthread-safe bits of code (otherwise strange hangs are observed on subsequent uses of ROOT)

Definition at line 249 of file trfFileUtils.py.

249def PHYSVALEntries(fileName, integral=False):
250
251 root = import_root()
252
253 fname = root.TFile.Open(fileName, 'READ')
254
255 if not (isinstance(fname, root.TFile) and fname.IsOpen()):
256 return None
257
258 aipc = fname.Get("/EventInfo/EventInfo_actualInteractionsPerCrossing")
259
260 if not aipc:
261 # Not PHYSVAL...
262 return None
263
264 # If we want the weights, give us the weights
265 if integral:
266 return aipc.Integral()
267
268 # Otherwise we just want the entries
269 return int(aipc.GetEntries())
270
271

◆ PRWEntries()

python.trfFileUtils.PRWEntries ( fileName,
integral = False )

Determines number of entries in PRW file.

Parameters
fileNamePath to the PRW file.
integralReturns sum of weights if true
Returns
  • Number of entries.
  • Sum of weights if integral is true.
  • None if the determination failed.
Note
Use the PyCmt forking decorator to ensure that ROOT is run completely within a child process and will not 'pollute' the parent python process with unthread-safe bits of code (otherwise strange hangs are observed on subsequent uses of ROOT)

Definition at line 206 of file trfFileUtils.py.

206def PRWEntries(fileName, integral=False):
207
208 root = import_root()
209
210 fname = root.TFile.Open(fileName, 'READ')
211
212 if not (isinstance(fname, root.TFile) and fname.IsOpen()):
213 return None
214
215 rundir = None
216
217 for key in fname.GetListOfKeys():
218 if key.GetName()=='PileupReweighting':
219 rundir = fname.Get('PileupReweighting')
220 break
221 # Not PRW...
222
223 if rundir is None: return None
224
225 total = 0
226 for key in rundir.GetListOfKeys():
227 if 'pileup' in key.GetName():
228 msg.debug('Working on file '+fileName+' histo '+key.GetName())
229 if integral:
230 total += rundir.Get(key.GetName()).Integral()
231 else:
232 total += rundir.Get(key.GetName()).GetEntries()
233 # Was not one of our histograms
234 # Make sure we return an int for the number of events
235 return int(total)
236
237
TGraphErrors * GetEntries(TH2F *histo)

◆ ROOTGetSize()

python.trfFileUtils.ROOTGetSize ( filename)

Get the size of a file via ROOT's TFile.

Use TFile.Open to retrieve a ROOT filehandle, which will deal with all non-posix filesystems. Return the GetSize() value. The option filetype=raw is added to ensure this works for non-ROOT files too (e.g. BS)

Note
Use the PyUtils forking decorator to ensure that ROOT is run completely within a child process and will not 'pollute' the parent python process with unthread-safe bits of code (otherwise strange hangs are observed on subsequent uses of ROOT)
Parameters
filenameFilename to get size of
Returns
fileSize or None if there was a problem

Definition at line 282 of file trfFileUtils.py.

282def ROOTGetSize(filename):
283 root = import_root()
284
285 try:
286 msg.debug('Calling TFile.Open for {0}'.format(filename))
287 extraparam = '?filetype=raw'
288 if filename.startswith("https") or filename.startswith("davs"):
289 try:
290 pos = filename.find("?")
291 if pos>=0:
292 extraparam = '&filetype=raw'
293 else:
294 extraparam = '?filetype=raw'
295 except Exception:
296 extraparam = '?filetype=raw'
297 fname = root.TFile.Open(filename + extraparam, 'READ')
298 fsize = fname.GetSize()
299 msg.debug('Got size {0} from TFile.GetSize'.format(fsize))
300 except ReferenceError:
301 msg.error('Failed to get size of {0}'.format(filename))
302 return None
303
304 fname.Close()
305 del root
306 return fsize
307
308

◆ urlType()

python.trfFileUtils.urlType ( filename)

Return the LAN access type for a file URL.

Parameters
filenameName of file to examine
Returns
  • String with LAN protocol

Definition at line 313 of file trfFileUtils.py.

313def urlType(filename):
314 if filename.startswith('dcap:'):
315 return 'dcap'
316 if filename.startswith('root:'):
317 return 'root'
318 if filename.startswith('rfio:'):
319 return 'rfio'
320 if filename.startswith('file:'):
321 return 'posix'
322 if filename.startswith('https:'):
323 return 'root'
324 if filename.startswith('davs:'):
325 return 'root'
326 return 'posix'
327

Variable Documentation

◆ athFileInterestingKeys

list python.trfFileUtils.athFileInterestingKeys = ['file_size', 'file_guid', 'file_type', 'nentries']

Definition at line 20 of file trfFileUtils.py.

◆ msg

python.trfFileUtils.msg = logging.getLogger(__name__)

Definition at line 10 of file trfFileUtils.py.