ATLAS Offline Software
Loading...
Searching...
No Matches
root_pickle.py
Go to the documentation of this file.
1# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
2
3#
4# File: root_pickle.py
5# Created: sss, 2004.
6# Purpose: Pickle python data into a root file, preserving references
7# to root objects.
8#
9
10"""Pickle python data into a root file, preserving references to root objects.
11
12This module allows pickling python
13objects into a root file. The python objects may contain
14references to named root objects. If one has set up a structure
15of python objects to hold root histograms, this provides a
16convenient way of saving and restoring your histograms.
17The pickled python data are stored in an additional string
18object in the root file; any root objects are stored as usual.
19(Thus, root files written by root_pickle can be
20read just like any other root file if you don't care about the
21python data.)
22
23Here's an example of writing a pickle:
24
25 import ROOT
26 from PyAnalysisUtils.root_pickle import dump_root
27 hlist = []
28 for i in range (10):
29 name = 'h%d' % i
30 hlist.append (ROOT.TH1F (name, name, 10, 0, 10))
31 dump_root (hlist, 'test.root')
32
33This writes a list of histograms to test.root.
34The histograms may be read back like this:
35
36 import ROOT
37 from PyAnalysisUtils.root_pickle import load_root
38 hlist = load_root ('test.root')
39
40
41The following additional notes apply.
42
43 - In addition to dump_root and
44 load_root, the module also provides
45 dump and load functions, which
46 take root file objects instead of file names.
47
48 - The present version of root_pickle will
49 not work correctly for the case of python objects deriving
50 from root objects. It will probably also not work for the
51 case of root objects which do not derive from
52 TObject.
53
54 - When the pickled data are being read, if a class
55 doesn't exist, root_pickle will create a
56 dummy class with no methods and use that. This is
57 different from the standard pickle behavior (where it
58 would be an error), but it simplifies usage in the common
59 case where the class is being used to hold histograms, and
60 its methods are entirely concerned with filling the
61 histograms.
62
63 - When restoring a reference to a root object, the default behavior
64 is to not read the root object itself, but instead to create a proxy.
65 The root object will then be read the first time the proxy is accessed.
66 This can help significantly with time and memory usage if you're
67 only accessing a small fraction of the root objects, but it does
68 mean that you need to keep the root file open. Pass use_proxy=0
69 to disable this behavior.
70"""
71
72import pickle
73import ROOT
74import sys
75
76def _getdir():
77 if hasattr (ROOT.TDirectory, 'CurrentDirectory'):
78 d = ROOT.TDirectory.CurrentDirectory()
79 if hasattr (d, 'load'):
80 # Handle case of CurrentDirectory() returning an atomic.
81 d = d.load()
82 return ROOT.gDirectory
83
84
85def _setdir (d):
86 ROOT.TDirectory.cd (d)
87
88
89#
90# This stuff was originally written in terms of an stringIO stream.
91# But with py3, i couldn't find a better way of getting bytes objects
92# into and out of a TString.
93#
94# Argh! We can't store NULs in TObjStrings.
95# But pickle protocols > 0 are binary protocols, and will get corrupted
96# if we truncate at a NUL.
97# So, when we save the pickle data, make the mappings:
98# 0x00 -> 0xff 0x01
99# 0xff -> 0xff 0xfe
100# ... This may actually be obsolete --- looks like we can have NULs
101# in TObjString now, if we access the TString direectly. But retain
102# for compatibility with existing pickles.
103#
104
106 def __init__ (self):
107 self.reopen()
108
109 def write (self, s):
110 ss = self._str
111 log = []
112 for c in s:
113 code = c
114 if code == 0xff:
115 ss.Append (0xff)
116 ss.Append (0xfe)
117 log.append (0xff)
118 log.append (0xfe)
119 elif code == 0x00:
120 ss.Append (0xff)
121 ss.Append (0x01)
122 log.append (0xff)
123 log.append (0x01)
124 else:
125 ss.Append (code)
126 log.append (code)
127 return
128
129 def getvalue (self):
130 return self._s
131
132 def reopen (self):
133 self._s = ROOT.TObjString()
134 self._str = self._s.String()
135 return
136
137
139 def __init__ (self):
140 self.reopen()
141
142
143 def read (self, i):
144 out = []
145 slen = len(self._str)
146 while i != 0 and self._pos < slen:
147 c = ord(self._str[self._pos])
148 if c == 0xff:
149 self._pos += 1
150 if self._pos >= slen:
151 break
152 c = ord(self._str[self._pos])
153 if c == 0x01:
154 c = 0x00
155 elif c == 0xfe:
156 c = 0xff
157 out.append (c)
158 self._pos += 1
159 i -= 1
160 return bytes(out)
161
162
163 def readline (self):
164 out = []
165 slen = len(self._str)
166 while self._pos < slen:
167 c = ord(self._str[self._pos])
168 if c == 0xff:
169 self._pos += 1
170 if self._pos >= slen:
171 break
172 c = ord(self._str[self._pos])
173 if c == 0x01:
174 c = 0x00
175 elif c == 0xfe:
176 c = 0xff
177 out.append (c)
178 self._pos += 1
179 if c == 10:
180 break
181 return bytes(out)
182
183
184 def setvalue (self, s):
185 self._s = s
186 self._str = self._s.String()
187 self._pos = 0
188 return
189
190
191 def reopen (self):
192 self.setvalue (ROOT.TObjString())
193 return
194
195
197 def __init__ (self, file, proto=0):
198 """Create a root pickler.
199FILE should be a Root TFile. PROTO is the python pickle protocol
200version to use. The python part will be pickled to a Root
201TObjString called _pickle; it will contain references to the
202Root objects.
203"""
204 self.__file = file
205 self.__keys = file.GetListOfKeys()
207 self.__pickle = pickle.Pickler (self.__io, proto)
208 self.__pickle.persistent_id = self._persistent_id
209 self.__pmap = {}
210 return
211
212
213 def dump (self, o, key=None):
214 """Write a pickled representation of o to the open TFile."""
215 if key is None:
216 key = '_pickle'
217 directory = _getdir()
218 try:
219 self.__file.cd()
220 self.__pickle.dump (o)
221 s = self.__io.getvalue()
222 self.__io.reopen()
223 s.Write (key)
224 self.__file.Flush()
225 self.__pmap.clear()
226 finally:
227 _setdir (directory)
228 return
229
230
231 def clear_memo (self):
232 """Clears the pickler's internal memo."""
233 self.__pickle.memo.clear()
234 return
235
236
237
238 def _persistent_id (self, o):
239 if hasattr (o, '_Root_Proxy__obj'):
240 o = o._Root_Proxy__obj()
241 if (isinstance (o, ROOT.TObject)):
242 # Write the object, and return the resulting NAME;CYCLE.
243 # We used to to this like this:
244 #o.Write()
245 #k = self.__file.GetKey (o.GetName())
246 #pid = "%s;%d" % (k.GetName(), k.GetCycle())
247 # It turns out, though, that destroying the python objects
248 # referencing the TKeys is quite expensive (O(logN) where
249 # N is the total number of pyroot objects?). Although
250 # we want to allow for the case of saving multiple objects
251 # with the same name, the most common case is that the name
252 # has not already been written to the file. So we optimize
253 # for that case, doing the key lookup before we write the
254 # object, not after. (Note further: GetKey() is very slow
255 # if the key does not actually exist, as it does a linear
256 # search of the key list. We use FindObject instead for the
257 # initial lookup, which is a hashed lookup, but it is not
258 # guaranteed to find the highest cycle. So if we do
259 # find an existing key, we need to look up again using GetKey.
260 nm = o.GetName()
261 k = self.__keys.FindObject(nm)
262 o.Write()
263 if k:
264 k = self.__file.GetKey (nm)
265 pid = "%s;%d" % (nm, k.GetCycle())
266 else:
267 pid = nm + ";1"
268 return pid
269 return
270
271
272
274 def __init__ (self):
275 self.__chunksize = 65536
276 self.__i = self.__chunksize
277 self.__chunks = []
278 return
279
280 def add (self, o):
281 if self.__i >= self.__chunksize:
282 self.__chunks.append ([None] * self.__chunksize)
283 self.__i = 0
284 self.__chunks[-1][self.__i] = o
285 self.__i += 1
286 return
287
288
289_compat_hooks = None
290xsave=Saver()
292 __slots__ = ('__f', '__pid', '__o')
293 def __init__ (self, f, pid):
294 self.__f = f
295 self.__pid = sys.intern(pid)
296 self.__o = None
297 return
298 def __getattr__ (self, a):
299 if self.__o is None:
300 self.__o = self.__f.Get (self.__pid)
301 if self.__o.__class__.__module__ != 'ROOT':
302 self.__o.__class__.__module__ = 'ROOT'
303 return getattr (self.__o, a)
304 def __obj (self):
305 if self.__o is None:
306 self.__o = self.__f.Get (self.__pid)
307 if self.__o.__class__.__module__ != 'ROOT':
308 self.__o.__class__.__module__ = 'ROOT'
309 return self.__o
310class Unpickler (pickle.Unpickler):
311 def __init__ (self, file, use_proxy = True, use_hash = False):
312 """Create a root unpickler.
313FILE should be a Root TFile.
314"""
315 self.__use_proxy = use_proxy
316 self.__file = file
318 pickle.Unpickler.__init__ (self, self.__io)
319
320 self.__n = 0
321 xsave.add (file)
322
323 if use_hash:
324 htab = {}
325 ctab = {}
326 for k in file.GetListOfKeys():
327 nm = k.GetName()
328 cy = k.GetCycle()
329 htab[(nm,cy)] = k
330 if cy > ctab.get(nm,0):
331 ctab[nm] = cy
332 htab[(nm,9999)] = k
333 file._htab = htab
334 oget = file.Get
335 def xget (nm0):
336 nm = nm0
337 ipos = nm.find (';')
338 if ipos >= 0:
339 cy = nm[ipos+1]
340 if cy == '*':
341 cy = 10000
342 else:
343 cy = int(cy)
344 nm = nm[:ipos-1]
345 else:
346 cy = 9999
347 ret = htab.get ((nm,cy), None)
348 if not ret:
349 print ("did't find", nm, cy, len(htab))
350 return oget (nm0)
351 #ctx = ROOT.TDirectory.TContext (file)
352 ret = ret.ReadObj()
353 #del ctx
354 return ret
355 file.Get = xget
356 return
357
358
359 def load (self, key=None):
360 """Read a pickled object representation from the open file."""
361 if key is None:
362 key = '_pickle'
363 o = None
364 if _compat_hooks:
365 save = _compat_hooks[0]()
366 try:
367 self.__n += 1
368 s = self.__file.Get (key + ';%d' % self.__n)
369 self.__io.setvalue (s)
370 o = pickle.Unpickler.load(self)
371 self.__io.reopen ()
372 finally:
373 if _compat_hooks:
374 save = _compat_hooks[1](save)
375 return o
376
377 def persistent_load (self, pid):
378 if self.__use_proxy:
379 o = Root_Proxy (self.__file, pid)
380 else:
381 o = self.__file.Get (pid)
382 #print ('load ', pid, o)
383 xsave.add(o)
384 return o
385
386
387 def find_class (self, module, name):
388 if module == 'copy_reg':
389 module = 'copyreg'
390 elif module == '__builtin__':
391 module = 'builtins'
392 try:
393 try:
394 __import__(module)
395 mod = sys.modules[module]
396 except ImportError:
397 print ("Making dummy module %s" % (module))
398 class DummyModule:
399 pass
400 mod = DummyModule()
401 sys.modules[module] = mod
402 klass = getattr(mod, name)
403 return klass
404 except AttributeError:
405 print ("Making dummy class %s.%s" % (module, name))
406 mod = sys.modules[module]
407 class Dummy(object):
408 pass
409 setattr (mod, name, Dummy)
410 return Dummy
411 return
412
413
414
415def compat_hooks (hooks):
416 """Set compatibility hooks.
417If this is set, then hooks[0] is called before loading,
418and hooks[1] is called after loading. hooks[1] is called with
419the return value of hooks[0] as an argument. This is useful
420for backwards compatibility in some situations."""
421 global _compat_hooks
422 _compat_hooks = hooks
423 return
424
425
426def dump (o, f, proto=0, key=None):
427 """Dump object O to the Root TFile F."""
428 return Pickler(f, proto).dump(o, key)
429
430def load (f, use_proxy = 1, key=None):
431 """Load an object from the Root TFile F."""
432 return Unpickler(f, use_proxy).load(key)
433
434def dump_root (o, fname, proto=0, key=None):
435 """Dump object O to the Root file named FNAME."""
436 f = ROOT.TFile (fname , "RECREATE")
437 dump (o, f, proto, key)
438 f.Close()
439 return
440
441def load_root (fname, use_proxy = 1, key=None):
442 """Load an object from the Root file named FNAME."""
443 return load (ROOT.TFile (fname), use_proxy, key)
444
445
__init__(self, file, proto=0)
find_class(self, module, name)
__init__(self, file, use_proxy=True, use_hash=False)
-event-from-file
load_root(fname, use_proxy=1, key=None)
load(f, use_proxy=1, key=None)
dump_root(o, fname, proto=0, key=None)