ATLAS Offline Software
Loading...
Searching...
No Matches
root_pickle.py
Go to the documentation of this file.
1# Copyright (C) 2002-2026 CERN for the benefit of the ATLAS collaboration
2
3#
4# File: root_pickle.py
5# Created: sss, 2004.
6# Purpose: Pickle python data into a root file, preserving references
7# to root objects.
8#
9
10"""Pickle python data into a root file, preserving references to root objects.
11
12This module allows pickling python
13objects into a root file. The python objects may contain
14references to named root objects. If one has set up a structure
15of python objects to hold root histograms, this provides a
16convenient way of saving and restoring your histograms.
17The pickled python data are stored in an additional string
18object in the root file; any root objects are stored as usual.
19(Thus, root files written by root_pickle can be
20read just like any other root file if you don't care about the
21python data.)
22
23Here's an example of writing a pickle:
24
25 import ROOT
26 from PyAnalysisUtils.root_pickle import dump_root
27 hlist = []
28 for i in range (10):
29 name = 'h%d' % i
30 hlist.append (ROOT.TH1F (name, name, 10, 0, 10))
31 dump_root (hlist, 'test.root')
32
33This writes a list of histograms to test.root.
34The histograms may be read back like this:
35
36 import ROOT
37 from PyAnalysisUtils.root_pickle import load_root
38 hlist = load_root ('test.root')
39
40
41The following additional notes apply.
42
43 - In addition to dump_root and
44 load_root, the module also provides
45 dump and load functions, which
46 take root file objects instead of file names.
47
48 - The present version of root_pickle will
49 not work correctly for the case of python objects deriving
50 from root objects. It will probably also not work for the
51 case of root objects which do not derive from
52 TObject.
53
54 - When the pickled data are being read, if a class
55 doesn't exist, root_pickle will create a
56 dummy class with no methods and use that. This is
57 different from the standard pickle behavior (where it
58 would be an error), but it simplifies usage in the common
59 case where the class is being used to hold histograms, and
60 its methods are entirely concerned with filling the
61 histograms.
62
63 - When restoring a reference to a root object, the default behavior
64 is to not read the root object itself, but instead to create a proxy.
65 The root object will then be read the first time the proxy is accessed.
66 This can help significantly with time and memory usage if you're
67 only accessing a small fraction of the root objects, but it does
68 mean that you need to keep the root file open. Pass use_proxy=0
69 to disable this behavior.
70"""
71
72import pickle
73import ROOT
74import sys
75
76def _getdir():
77 if hasattr (ROOT.TDirectory, 'CurrentDirectory'):
78 d = ROOT.TDirectory.CurrentDirectory()
79 if hasattr (d, 'load'):
80 # Handle case of CurrentDirectory() returning an atomic.
81 d = d.load()
82 d = ROOT.gDirectory
83 if hasattr (d, '_resolve'):
84 # Handle case of CurrentDirectory() returning TDirectoryPythonAdapter.
85 d = d._resolve()
86 return d
87
88
89def _setdir (d):
90 ROOT.TDirectory.cd (d)
91
92
93#
94# This stuff was originally written in terms of an stringIO stream.
95# But with py3, i couldn't find a better way of getting bytes objects
96# into and out of a TString.
97#
98# Argh! We can't store NULs in TObjStrings.
99# But pickle protocols > 0 are binary protocols, and will get corrupted
100# if we truncate at a NUL.
101# So, when we save the pickle data, make the mappings:
102# 0x00 -> 0xff 0x01
103# 0xff -> 0xff 0xfe
104# ... This may actually be obsolete --- looks like we can have NULs
105# in TObjString now, if we access the TString direectly. But retain
106# for compatibility with existing pickles.
107#
108
110 def __init__ (self):
111 self.reopen()
112
113 def write (self, s):
114 ss = self._str
115 log = []
116 for c in s:
117 code = c
118 if code == 0xff:
119 ss.Append (0xff)
120 ss.Append (0xfe)
121 log.append (0xff)
122 log.append (0xfe)
123 elif code == 0x00:
124 ss.Append (0xff)
125 ss.Append (0x01)
126 log.append (0xff)
127 log.append (0x01)
128 else:
129 ss.Append (code)
130 log.append (code)
131 return
132
133 def getvalue (self):
134 return self._s
135
136 def reopen (self):
137 self._s = ROOT.TObjString()
138 self._str = self._s.String()
139 return
140
141
143 def __init__ (self):
144 self.reopen()
145
146
147 def read (self, i):
148 out = []
149 slen = len(self._str)
150 while i != 0 and self._pos < slen:
151 c = ord(self._str[self._pos])
152 if c == 0xff:
153 self._pos += 1
154 if self._pos >= slen:
155 break
156 c = ord(self._str[self._pos])
157 if c == 0x01:
158 c = 0x00
159 elif c == 0xfe:
160 c = 0xff
161 out.append (c)
162 self._pos += 1
163 i -= 1
164 return bytes(out)
165
166
167 def readline (self):
168 out = []
169 slen = len(self._str)
170 while self._pos < slen:
171 c = ord(self._str[self._pos])
172 if c == 0xff:
173 self._pos += 1
174 if self._pos >= slen:
175 break
176 c = ord(self._str[self._pos])
177 if c == 0x01:
178 c = 0x00
179 elif c == 0xfe:
180 c = 0xff
181 out.append (c)
182 self._pos += 1
183 if c == 10:
184 break
185 return bytes(out)
186
187
188 def setvalue (self, s):
189 self._s = s
190 self._str = self._s.String()
191 self._pos = 0
192 return
193
194
195 def reopen (self):
196 self.setvalue (ROOT.TObjString())
197 return
198
199
200class Pickler(object):
201 def __init__ (self, file, proto=0):
202 """Create a root pickler.
203FILE should be a Root TFile. PROTO is the python pickle protocol
204version to use. The python part will be pickled to a Root
205TObjString called _pickle; it will contain references to the
206Root objects.
207"""
208 self.__file = file
209 self.__keys = file.GetListOfKeys()
211 self.__pickle = pickle.Pickler (self.__io, proto)
212 self.__pickle.persistent_id = self._persistent_id
213 self.__pmap = {}
214 return
215
216
217 def dump (self, o, key=None):
218 """Write a pickled representation of o to the open TFile."""
219 if key is None:
220 key = '_pickle'
221 directory = _getdir()
222 try:
223 self.__file.cd()
224 self.__pickle.dump (o)
225 s = self.__io.getvalue()
226 self.__io.reopen()
227 s.Write (key)
228 self.__file.Flush()
229 self.__pmap.clear()
230 finally:
231 _setdir (directory)
232 return
233
234
235 def clear_memo (self):
236 """Clears the pickler's internal memo."""
237 self.__pickle.memo.clear()
238 return
239
240
241
242 def _persistent_id (self, o):
243 if hasattr (o, '_Root_Proxy__obj'):
244 o = o._Root_Proxy__obj()
245 if (isinstance (o, ROOT.TObject)):
246 # Write the object, and return the resulting NAME;CYCLE.
247 # We used to to this like this:
248 #o.Write()
249 #k = self.__file.GetKey (o.GetName())
250 #pid = "%s;%d" % (k.GetName(), k.GetCycle())
251 # It turns out, though, that destroying the python objects
252 # referencing the TKeys is quite expensive (O(logN) where
253 # N is the total number of pyroot objects?). Although
254 # we want to allow for the case of saving multiple objects
255 # with the same name, the most common case is that the name
256 # has not already been written to the file. So we optimize
257 # for that case, doing the key lookup before we write the
258 # object, not after. (Note further: GetKey() is very slow
259 # if the key does not actually exist, as it does a linear
260 # search of the key list. We use FindObject instead for the
261 # initial lookup, which is a hashed lookup, but it is not
262 # guaranteed to find the highest cycle. So if we do
263 # find an existing key, we need to look up again using GetKey.
264 nm = o.GetName()
265 k = self.__keys.FindObject(nm)
266 o.Write()
267 if k:
268 k = self.__file.GetKey (nm)
269 pid = "%s;%d" % (nm, k.GetCycle())
270 else:
271 pid = nm + ";1"
272 return pid
273 return
274
275
276
277class Saver(object):
278 def __init__ (self):
279 self.__chunksize = 65536
280 self.__i = self.__chunksize
281 self.__chunks = []
282 return
283
284 def add (self, o):
285 if self.__i >= self.__chunksize:
286 self.__chunks.append ([None] * self.__chunksize)
287 self.__i = 0
288 self.__chunks[-1][self.__i] = o
289 self.__i += 1
290 return
291
292
293_compat_hooks = None
294xsave=Saver()
295class Root_Proxy (object):
296 __slots__ = ('__f', '__pid', '__o')
297 def __init__ (self, f, pid):
298 self.__f = f
299 self.__pid = sys.intern(pid)
300 self.__o = None
301 return
302 def __getattr__ (self, a):
303 if self.__o is None:
304 self.__o = self.__f.Get (self.__pid)
305 if self.__o.__class__.__module__ != 'ROOT':
306 self.__o.__class__.__module__ = 'ROOT'
307 return getattr (self.__o, a)
308 def __obj (self):
309 if self.__o is None:
310 self.__o = self.__f.Get (self.__pid)
311 if self.__o.__class__.__module__ != 'ROOT':
312 self.__o.__class__.__module__ = 'ROOT'
313 return self.__o
314class Unpickler (pickle.Unpickler):
315 def __init__ (self, file, use_proxy = True, use_hash = False):
316 """Create a root unpickler.
317FILE should be a Root TFile.
318"""
319 self.__use_proxy = use_proxy
320 self.__file = file
322 pickle.Unpickler.__init__ (self, self.__io)
323
324 self.__n = 0
325 xsave.add (file)
326
327 if use_hash:
328 htab = {}
329 ctab = {}
330 for k in file.GetListOfKeys():
331 nm = k.GetName()
332 cy = k.GetCycle()
333 htab[(nm,cy)] = k
334 if cy > ctab.get(nm,0):
335 ctab[nm] = cy
336 htab[(nm,9999)] = k
337 file._htab = htab
338 oget = file.Get
339 def xget (nm0):
340 nm = nm0
341 ipos = nm.find (';')
342 if ipos >= 0:
343 cy = nm[ipos+1]
344 if cy == '*':
345 cy = 10000
346 else:
347 cy = int(cy)
348 nm = nm[:ipos-1]
349 else:
350 cy = 9999
351 ret = htab.get ((nm,cy), None)
352 if not ret:
353 print ("did't find", nm, cy, len(htab))
354 return oget (nm0)
355 #ctx = ROOT.TDirectory.TContext (file)
356 ret = ret.ReadObj()
357 #del ctx
358 return ret
359 file.Get = xget
360 return
361
362
363 def load (self, key=None):
364 """Read a pickled object representation from the open file."""
365 if key is None:
366 key = '_pickle'
367 o = None
368 if _compat_hooks:
369 save = _compat_hooks[0]()
370 try:
371 self.__n += 1
372 s = self.__file.Get (key + ';%d' % self.__n)
373 self.__io.setvalue (s)
374 o = pickle.Unpickler.load(self)
375 self.__io.reopen ()
376 finally:
377 if _compat_hooks:
378 save = _compat_hooks[1](save)
379 return o
380
381 def persistent_load (self, pid):
382 if self.__use_proxy:
383 o = Root_Proxy (self.__file, pid)
384 else:
385 o = self.__file.Get (pid)
386 #print ('load ', pid, o)
387 xsave.add(o)
388 return o
389
390
391 def find_class (self, module, name):
392 if module == 'copy_reg':
393 module = 'copyreg'
394 elif module == '__builtin__':
395 module = 'builtins'
396 try:
397 try:
398 __import__(module)
399 mod = sys.modules[module]
400 except ImportError:
401 print ("Making dummy module %s" % (module))
402 class DummyModule:
403 pass
404 mod = DummyModule()
405 sys.modules[module] = mod
406 klass = getattr(mod, name)
407 return klass
408 except AttributeError:
409 print ("Making dummy class %s.%s" % (module, name))
410 mod = sys.modules[module]
411 class Dummy(object):
412 pass
413 setattr (mod, name, Dummy)
414 return Dummy
415 return
416
417
418
419def compat_hooks (hooks):
420 """Set compatibility hooks.
421If this is set, then hooks[0] is called before loading,
422and hooks[1] is called after loading. hooks[1] is called with
423the return value of hooks[0] as an argument. This is useful
424for backwards compatibility in some situations."""
425 global _compat_hooks
426 _compat_hooks = hooks
427 return
428
429
430def dump (o, f, proto=0, key=None):
431 """Dump object O to the Root TFile F."""
432 return Pickler(f, proto).dump(o, key)
433
434def load (f, use_proxy = 1, key=None):
435 """Load an object from the Root TFile F."""
436 return Unpickler(f, use_proxy).load(key)
437
438def dump_root (o, fname, proto=0, key=None):
439 """Dump object O to the Root file named FNAME."""
440 f = ROOT.TFile (fname , "RECREATE")
441 dump (o, f, proto, key)
442 f.Close()
443 return
444
445def load_root (fname, use_proxy = 1, key=None):
446 """Load an object from the Root file named FNAME."""
447 return load (ROOT.TFile (fname), use_proxy, key)
448
449
void clear()
Empty the pool.
__init__(self, file, proto=0)
find_class(self, module, name)
__init__(self, file, use_proxy=True, use_hash=False)
-event-from-file
load_root(fname, use_proxy=1, key=None)
load(f, use_proxy=1, key=None)
dump_root(o, fname, proto=0, key=None)