ATLAS Offline Software
Loading...
Searching...
No Matches
trfFileUtils.py
Go to the documentation of this file.
1# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
2
8
9import logging
10msg = logging.getLogger(__name__)
11
12# @note Use the PyUtils forking decorator to ensure that ROOT is run completely within
13# a child process and will not 'pollute' the parent python process with unthread-safe
14# bits of code (otherwise strange hangs are observed on subsequent uses of ROOT)
15import PyUtils.Decorators as _decos
16
17from PyUtils.RootUtils import import_root
18
19# Use a stripped down key list, as we retrieve only 'fast' metadata
20athFileInterestingKeys = ['file_size', 'file_guid', 'file_type', 'nentries']
21
22
23def AthenaLiteFileInfo(filename, filetype, retrieveKeys = athFileInterestingKeys):
24 msg.debug('Calling AthenaLiteFileInfo for {0} (type {1})'.format(filename, filetype))
25 from PyUtils.MetaReader import read_metadata
26
27 metaDict = {}
28 try:
29 meta = read_metadata(filename,None,'lite')[filename]
30 msg.debug('read_metadata came back for {0}'.format(filename))
31 metaDict[filename] = {}
32 for key in retrieveKeys:
33 msg.debug('Looking for key {0}'.format(key))
34 try:
35 metaval = meta[key]
36 metaDict[filename][key] = metaval.lower() if key == 'file_type' else metaval
37 except KeyError:
38 msg.warning('Missing key in athFile info: {0}'.format(key))
39 except (ValueError, AssertionError, ReferenceError):
40 msg.error('Problem in getting metadata for {0}'.format(filename))
41 return None
42 msg.debug('Returning {0}'.format(metaDict))
43 return metaDict
44
45
54@_decos.forking
55def HISTEntries(fileName):
56
57 root = import_root()
58
59 fname = root.TFile.Open(fileName, 'READ')
60
61 if not (isinstance(fname, root.TFile) and fname.IsOpen()):
62 return None
63
64 rundir = None
65 keys = fname.GetListOfKeys()
66
67 for key in keys:
68
69 name=key.GetName()
70
71 if name.startswith('run_') and name != 'run_multiple':
72
73 if rundir is not None:
74 msg.warning('Found two run_ directories in HIST file %s: %s and %s', fileName, rundir, name)
75 return None
76 else:
77 rundir = name
78
79 del name
80
81 if rundir is None:
82 msg.warning( 'Unable to find run directory in HIST file %s', fileName )
83 fname.Close()
84 return None
85
86 msg.info( 'Using run directory %s for event counting of HIST file %s. ', rundir, fileName )
87
88 hpath = '%s/GLOBAL/DQTDataFlow/events_lb' % rundir
89 possibleLBs = []
90 if 'tmp.HIST_' in fileName:
91 msg.info( 'Special case for temporary HIST file {0}. '.format( fileName ) )
92 h = fname.Get('{0}'.format(rundir))
93 for directories in h.GetListOfKeys() :
94 if 'lb' in directories.GetName():
95 msg.info( 'Using {0} in tmp HIST file {1}. '.format(directories.GetName(), fileName ) )
96 hpath = rundir+'/'+str(directories.GetName())+'/GLOBAL/DQTDataFlow/events_lb'
97 possibleLBs.append(hpath)
98 else:
99 msg.info( 'Classical case for HIST file {0}. '.format( fileName ) )
100 possibleLBs.append(hpath)
101 nev = 0
102 if len(possibleLBs) == 0:
103 msg.warning( 'Unable to find events_lb histogram in HIST file %s', fileName )
104 fname.Close()
105 return None
106 for hpath in possibleLBs:
107 h = fname.Get(hpath)
108
109 if not isinstance( h, root.TH1 ):
110 msg.warning( 'Unable to retrieve %s in HIST file %s.', hpath, fileName )
111 fname.Close()
112 return None
113
114 nBinsX = h.GetNbinsX()
115 nevLoc = 0
116
117 for i in range(1, nBinsX):
118
119 if h[i] < 0:
120 msg.warning( 'Negative number of events for step %s in HIST file %s.', h.GetXaxis().GetBinLabel(i), fileName )
121 fname.Close()
122 return None
123
124 elif h[i] == 0:
125 continue
126
127 if nevLoc == 0:
128 nevLoc = h[i]
129
130 else:
131 if nevLoc != h[i]:
132 msg.warning( 'Mismatch in events per step in HIST file %s; most recent step seen is %s.', fileName, h.GetXaxis().GetBinLabel(i) )
133 fname.Close()
134 return None
135 nev += nevLoc
136 fname.Close()
137 return nev
138
139
140
141
152@_decos.forking
153def NTUPEntries(fileName, treeNames):
154
155 if not isinstance( treeNames, list ):
156 treeNames=[treeNames]
157
158 root = import_root()
159
160 fname = root.TFile.Open(fileName, 'READ')
161
162 if not (isinstance(fname, root.TFile) and fname.IsOpen()):
163 return None
164
165 prevNum=None
166 prevTree=None
167
168 for treeName in treeNames:
169
170 tree = fname.Get(treeName)
171
172 if not isinstance(tree, root.TTree):
173 return None
174
175 num = tree.GetEntriesFast()
176
177 if not num>=0:
178 msg.warning('GetEntriesFast returned non positive value for tree %s in NTUP file %s.', treeName, fileName )
179 return None
180
181 if prevNum is not None and prevNum != num:
182 msg.warning( "Found diffferent number of entries in tree %s and tree %s of file %s.", treeName, prevTree, fileName )
183 return None
184
185 numberOfEntries=num
186 prevTree=treeName
187 del num
188 del tree
189
190 fname.Close()
191
192 return numberOfEntries
193
194
195
205@_decos.forking
206def PRWEntries(fileName, integral=False):
207
208 root = import_root()
209
210 fname = root.TFile.Open(fileName, 'READ')
211
212 if not (isinstance(fname, root.TFile) and fname.IsOpen()):
213 return None
214
215 rundir = None
216
217 for key in fname.GetListOfKeys():
218 if key.GetName()=='PileupReweighting':
219 rundir = fname.Get('PileupReweighting')
220 break
221 # Not PRW...
222
223 if rundir is None: return None
224
225 total = 0
226 for key in rundir.GetListOfKeys():
227 if 'pileup' in key.GetName():
228 msg.debug('Working on file '+fileName+' histo '+key.GetName())
229 if integral:
230 total += rundir.Get(key.GetName()).Integral()
231 else:
232 total += rundir.Get(key.GetName()).GetEntries()
233 # Was not one of our histograms
234 # Make sure we return an int for the number of events
235 return int(total)
236
237
238
248@_decos.forking
249def PHYSVALEntries(fileName, integral=False):
250
251 root = import_root()
252
253 fname = root.TFile.Open(fileName, 'READ')
254
255 if not (isinstance(fname, root.TFile) and fname.IsOpen()):
256 return None
257
258 aipc = fname.Get("/EventInfo/EventInfo_actualInteractionsPerCrossing")
259
260 if not aipc:
261 # Not PHYSVAL...
262 return None
263
264 # If we want the weights, give us the weights
265 if integral:
266 return aipc.Integral()
267
268 # Otherwise we just want the entries
269 return int(aipc.GetEntries())
270
271
272
281@_decos.forking
282def ROOTGetSize(filename):
283 root = import_root()
284
285 try:
286 msg.debug('Calling TFile.Open for {0}'.format(filename))
287 extraparam = '?filetype=raw'
288 if filename.startswith("https") or filename.startswith("davs"):
289 try:
290 pos = filename.find("?")
291 if pos>=0:
292 extraparam = '&filetype=raw'
293 else:
294 extraparam = '?filetype=raw'
295 except Exception:
296 extraparam = '?filetype=raw'
297 fname = root.TFile.Open(filename + extraparam, 'READ')
298 fsize = fname.GetSize()
299 msg.debug('Got size {0} from TFile.GetSize'.format(fsize))
300 except ReferenceError:
301 msg.error('Failed to get size of {0}'.format(filename))
302 return None
303
304 fname.Close()
305 del root
306 return fsize
307
308
309
313def urlType(filename):
314 if filename.startswith('dcap:'):
315 return 'dcap'
316 if filename.startswith('root:'):
317 return 'root'
318 if filename.startswith('rfio:'):
319 return 'rfio'
320 if filename.startswith('file:'):
321 return 'posix'
322 if filename.startswith('https:'):
323 return 'root'
324 if filename.startswith('davs:'):
325 return 'root'
326 return 'posix'
327
TGraphErrors * GetEntries(TH2F *histo)
NTUPEntries(fileName, treeNames)
Determines number of entries in NTUP file with given tree names.
ROOTGetSize(filename)
Get the size of a file via ROOT's TFile.
AthenaLiteFileInfo(filename, filetype, retrieveKeys=athFileInterestingKeys)
New lightweight interface to getting a single file's metadata.
PRWEntries(fileName, integral=False)
Determines number of entries in PRW file.
PHYSVALEntries(fileName, integral=False)
Determines number of entries in NTUP_PHYSVAL file.
urlType(filename)
Return the LAN access type for a file URL.
HISTEntries(fileName)
Determines number of events in a HIST file.