ATLAS Offline Software
root_pickle.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
2 
3 #
4 # File: root_pickle.py
5 # Created: sss, 2004.
6 # Purpose: Pickle python data into a root file, preserving references
7 # to root objects.
8 #
9 
10 """Pickle python data into a root file, preserving references to root objects.
11 
12 This module allows pickling python
13 objects into a root file. The python objects may contain
14 references to named root objects. If one has set up a structure
15 of python objects to hold root histograms, this provides a
16 convenient way of saving and restoring your histograms.
17 The pickled python data are stored in an additional string
18 object in the root file; any root objects are stored as usual.
19 (Thus, root files written by root_pickle can be
20 read just like any other root file if you don't care about the
21 python data.)
22 
23 Here's an example of writing a pickle:
24 
25  import ROOT
26  from PyAnalysisUtils.root_pickle import dump_root
27  hlist = []
28  for i in range (10):
29  name = 'h%d' % i
30  hlist.append (ROOT.TH1F (name, name, 10, 0, 10))
31  dump_root (hlist, 'test.root')
32 
33 This writes a list of histograms to test.root.
34 The histograms may be read back like this:
35 
36  import ROOT
37  from PyAnalysisUtils.root_pickle import load_root
38  hlist = load_root ('test.root')
39 
40 
41 The following additional notes apply.
42 
43  - In addition to dump_root and
44  load_root, the module also provides
45  dump and load functions, which
46  take root file objects instead of file names.
47 
48  - The present version of root_pickle will
49  not work correctly for the case of python objects deriving
50  from root objects. It will probably also not work for the
51  case of root objects which do not derive from
52  TObject.
53 
54  - When the pickled data are being read, if a class
55  doesn't exist, root_pickle will create a
56  dummy class with no methods and use that. This is
57  different from the standard pickle behavior (where it
58  would be an error), but it simplifies usage in the common
59  case where the class is being used to hold histograms, and
60  its methods are entirely concerned with filling the
61  histograms.
62 
63  - When restoring a reference to a root object, the default behavior
64  is to not read the root object itself, but instead to create a proxy.
65  The root object will then be read the first time the proxy is accessed.
66  This can help significantly with time and memory usage if you're
67  only accessing a small fraction of the root objects, but it does
68  mean that you need to keep the root file open. Pass use_proxy=0
69  to disable this behavior.
70 """
71 
72 import pickle
73 import ROOT
74 import sys
75 
76 def _getdir():
77  if hasattr (ROOT.TDirectory, 'CurrentDirectory'):
78  d = ROOT.TDirectory.CurrentDirectory()
79  if hasattr (d, 'load'):
80  # Handle case of CurrentDirectory() returning an atomic.
81  d = d.load()
82  return ROOT.gDirectory
83 
84 
85 def _setdir (d):
86  ROOT.TDirectory.cd (d)
87 
88 
89 #
90 # This stuff was originally written in terms of an stringIO stream.
91 # But with py3, i couldn't find a better way of getting bytes objects
92 # into and out of a TString.
93 #
94 # Argh! We can't store NULs in TObjStrings.
95 # But pickle protocols > 0 are binary protocols, and will get corrupted
96 # if we truncate at a NUL.
97 # So, when we save the pickle data, make the mappings:
98 # 0x00 -> 0xff 0x01
99 # 0xff -> 0xff 0xfe
100 # ... This may actually be obsolete --- looks like we can have NULs
101 # in TObjString now, if we access the TString direectly. But retain
102 # for compatibility with existing pickles.
103 #
104 
106  def __init__ (self):
107  self.reopen()
108 
109  def write (self, s):
110  ss = self._str
111  log = []
112  for c in s:
113  code = c
114  if code == 0xff:
115  ss.Append (0xff)
116  ss.Append (0xfe)
117  log.append (0xff)
118  log.append (0xfe)
119  elif code == 0x00:
120  ss.Append (0xff)
121  ss.Append (0x01)
122  log.append (0xff)
123  log.append (0x01)
124  else:
125  ss.Append (code)
126  log.append (code)
127  return
128 
129  def getvalue (self):
130  return self._s
131 
132  def reopen (self):
133  self._s = ROOT.TObjString()
134  self._str = self._s.String()
135  return
136 
137 
139  def __init__ (self):
140  self.reopen()
141 
142 
143  def read (self, i):
144  out = []
145  slen = len(self._str)
146  while i != 0 and self._pos < slen:
147  c = ord(self._str[self._pos])
148  if c == 0xff:
149  self._pos += 1
150  if self._pos >= slen:
151  break
152  c = ord(self._str[self._pos])
153  if c == 0x01:
154  c = 0x00
155  elif c == 0xfe:
156  c = 0xff
157  out.append (c)
158  self._pos += 1
159  i -= 1
160  return bytes(out)
161 
162 
163  def readline (self):
164  out = []
165  slen = len(self._str)
166  while self._pos < slen:
167  c = ord(self._str[self._pos])
168  if c == 0xff:
169  self._pos += 1
170  if self._pos >= slen:
171  break
172  c = ord(self._str[self._pos])
173  if c == 0x01:
174  c = 0x00
175  elif c == 0xfe:
176  c = 0xff
177  out.append (c)
178  self._pos += 1
179  if c == 10:
180  break
181  return bytes(out)
182 
183 
184  def setvalue (self, s):
185  self._s = s
186  self._str = self._s.String()
187  self._pos = 0
188  return
189 
190 
191  def reopen (self):
192  self.setvalue (ROOT.TObjString())
193  return
194 
195 
197  def __init__ (self, file, proto=0):
198  """Create a root pickler.
199 FILE should be a Root TFile. PROTO is the python pickle protocol
200 version to use. The python part will be pickled to a Root
201 TObjString called _pickle; it will contain references to the
202 Root objects.
203 """
204  self.__file = file
205  self.__keys = file.GetListOfKeys()
207  self.__pickle = pickle.Pickler (self.__io, proto)
208  self.__pickle.persistent_id = self._persistent_id
209  self.__pmap = {}
210  return
211 
212 
213  def dump (self, o, key=None):
214  """Write a pickled representation of o to the open TFile."""
215  if key is None:
216  key = '_pickle'
217  directory = _getdir()
218  try:
219  self.__file.cd()
220  self.__pickle.dump (o)
221  s = self.__io.getvalue()
222  self.__io.reopen()
223  s.Write (key)
224  self.__file.Flush()
225  self.__pmap.clear()
226  finally:
227  _setdir (directory)
228  return
229 
230 
231  def clear_memo (self):
232  """Clears the pickler's internal memo."""
233  self.__pickle.memo.clear()
234  return
235 
236 
237 
238  def _persistent_id (self, o):
239  if hasattr (o, '_Root_Proxy__obj'):
240  o = o._Root_Proxy__obj()
241  if (isinstance (o, ROOT.TObject)):
242  # Write the object, and return the resulting NAME;CYCLE.
243  # We used to to this like this:
244  #o.Write()
245  #k = self.__file.GetKey (o.GetName())
246  #pid = "%s;%d" % (k.GetName(), k.GetCycle())
247  # It turns out, though, that destroying the python objects
248  # referencing the TKeys is quite expensive (O(logN) where
249  # N is the total number of pyroot objects?). Although
250  # we want to allow for the case of saving multiple objects
251  # with the same name, the most common case is that the name
252  # has not already been written to the file. So we optimize
253  # for that case, doing the key lookup before we write the
254  # object, not after. (Note further: GetKey() is very slow
255  # if the key does not actually exist, as it does a linear
256  # search of the key list. We use FindObject instead for the
257  # initial lookup, which is a hashed lookup, but it is not
258  # guaranteed to find the highest cycle. So if we do
259  # find an existing key, we need to look up again using GetKey.
260  nm = o.GetName()
261  k = self.__keys.FindObject(nm)
262  o.Write()
263  if k:
264  k = self.__file.GetKey (nm)
265  pid = "%s;%d" % (nm, k.GetCycle())
266  else:
267  pid = nm + ";1"
268  return pid
269  return
270 
271 
272 
273 class Saver(object):
274  def __init__ (self):
275  self.__chunksize = 65536
276  self.__i = self.__chunksize
277  self.__chunks = []
278  return
279 
280  def add (self, o):
281  if self.__i >= self.__chunksize:
282  self.__chunks.append ([None] * self.__chunksize)
283  self.__i = 0
284  self.__chunks[-1][self.__i] = o
285  self.__i += 1
286  return
287 
288 
289 _compat_hooks = None
290 xsave=Saver()
292  __slots__ = ('__f', '__pid', '__o')
293  def __init__ (self, f, pid):
294  self.__f = f
295  self.__pid = sys.intern(pid)
296  self.__o = None
297  return
298  def __getattr__ (self, a):
299  if self.__o is None:
300  self.__o = self.__f.Get (self.__pid)
301  if self.__o.__class__.__module__ != 'ROOT':
302  self.__o.__class__.__module__ = 'ROOT'
303  return getattr (self.__o, a)
304  def __obj (self):
305  if self.__o is None:
306  self.__o = self.__f.Get (self.__pid)
307  if self.__o.__class__.__module__ != 'ROOT':
308  self.__o.__class__.__module__ = 'ROOT'
309  return self.__o
310 class Unpickler (pickle.Unpickler):
311  def __init__ (self, file, use_proxy = True, use_hash = False):
312  """Create a root unpickler.
313 FILE should be a Root TFile.
314 """
315  self.__use_proxy = use_proxy
316  self.__file = file
318  pickle.Unpickler.__init__ (self, self.__io)
319 
320  self.__n = 0
321  xsave.add (file)
322 
323  if use_hash:
324  htab = {}
325  ctab = {}
326  for k in file.GetListOfKeys():
327  nm = k.GetName()
328  cy = k.GetCycle()
329  htab[(nm,cy)] = k
330  if cy > ctab.get(nm,0):
331  ctab[nm] = cy
332  htab[(nm,9999)] = k
333  file._htab = htab
334  oget = file.Get
335  def xget (nm0):
336  nm = nm0
337  ipos = nm.find (';')
338  if ipos >= 0:
339  cy = nm[ipos+1]
340  if cy == '*':
341  cy = 10000
342  else:
343  cy = int(cy)
344  nm = nm[:ipos-1]
345  else:
346  cy = 9999
347  ret = htab.get ((nm,cy), None)
348  if not ret:
349  print ("did't find", nm, cy, len(htab))
350  return oget (nm0)
351  #ctx = ROOT.TDirectory.TContext (file)
352  ret = ret.ReadObj()
353  #del ctx
354  return ret
355  file.Get = xget
356  return
357 
358 
359  def load (self, key=None):
360  """Read a pickled object representation from the open file."""
361  if key is None:
362  key = '_pickle'
363  o = None
364  if _compat_hooks:
365  save = _compat_hooks[0]()
366  try:
367  self.__n += 1
368  s = self.__file.Get (key + ';%d' % self.__n)
369  self.__io.setvalue (s)
370  o = pickle.Unpickler.load(self)
371  self.__io.reopen ()
372  finally:
373  if _compat_hooks:
374  save = _compat_hooks[1](save)
375  return o
376 
377  def persistent_load (self, pid):
378  if self.__use_proxy:
379  o = Root_Proxy (self.__file, pid)
380  else:
381  o = self.__file.Get (pid)
382  #print ('load ', pid, o)
383  xsave.add(o)
384  return o
385 
386 
387  def find_class (self, module, name):
388  if module == 'copy_reg':
389  module = 'copyreg'
390  elif module == '__builtin__':
391  module = 'builtins'
392  try:
393  try:
394  __import__(module)
395  mod = sys.modules[module]
396  except ImportError:
397  print ("Making dummy module %s" % (module))
398  class DummyModule:
399  pass
400  mod = DummyModule()
401  sys.modules[module] = mod
402  klass = getattr(mod, name)
403  return klass
404  except AttributeError:
405  print ("Making dummy class %s.%s" % (module, name))
406  mod = sys.modules[module]
407  class Dummy(object):
408  pass
409  setattr (mod, name, Dummy)
410  return Dummy
411  return
412 
413 
414 
415 def compat_hooks (hooks):
416  """Set compatibility hooks.
417 If this is set, then hooks[0] is called before loading,
418 and hooks[1] is called after loading. hooks[1] is called with
419 the return value of hooks[0] as an argument. This is useful
420 for backwards compatibility in some situations."""
421  global _compat_hooks
422  _compat_hooks = hooks
423  return
424 
425 
426 def dump (o, f, proto=0, key=None):
427  """Dump object O to the Root TFile F."""
428  return Pickler(f, proto).dump(o, key)
429 
430 def load (f, use_proxy = 1, key=None):
431  """Load an object from the Root TFile F."""
432  return Unpickler(f, use_proxy).load(key)
433 
434 def dump_root (o, fname, proto=0, key=None):
435  """Dump object O to the Root file named FNAME."""
436  f = ROOT.TFile (fname , "RECREATE")
437  dump (o, f, proto, key)
438  f.Close()
439  return
440 
441 def load_root (fname, use_proxy = 1, key=None):
442  """Load an object from the Root file named FNAME."""
443  return load (ROOT.TFile (fname), use_proxy, key)
444 
445 
python.root_pickle.Unpickler.__io
__io
Definition: root_pickle.py:317
python.root_pickle.Read_Wrapper.readline
def readline(self)
Definition: root_pickle.py:163
python.root_pickle.Saver.add
def add(self, o)
Definition: root_pickle.py:280
python.root_pickle.load_root
def load_root(fname, use_proxy=1, key=None)
Definition: root_pickle.py:441
python.root_pickle.Saver.__i
__i
Definition: root_pickle.py:276
python.root_pickle.Unpickler.__init__
def __init__(self, file, use_proxy=True, use_hash=False)
Definition: root_pickle.py:311
python.root_pickle.Read_Wrapper.setvalue
def setvalue(self, s)
Definition: root_pickle.py:184
python.root_pickle.Write_Wrapper.write
def write(self, s)
Definition: root_pickle.py:109
python.root_pickle.Pickler.__keys
__keys
Definition: root_pickle.py:205
python.root_pickle.Root_Proxy
Definition: root_pickle.py:291
python.root_pickle.Read_Wrapper.reopen
def reopen(self)
Definition: root_pickle.py:191
python.root_pickle.Pickler.__pickle
__pickle
Definition: root_pickle.py:207
python.root_pickle.Unpickler.__n
__n
Definition: root_pickle.py:320
python.root_pickle.Read_Wrapper._s
_s
Definition: root_pickle.py:185
python.root_pickle.Saver.__chunksize
__chunksize
Definition: root_pickle.py:275
python.root_pickle.Pickler.__file
__file
Definition: root_pickle.py:204
python.root_pickle.dump_root
def dump_root(o, fname, proto=0, key=None)
Definition: root_pickle.py:434
python.root_pickle.Root_Proxy.__pid
__pid
Definition: root_pickle.py:295
python.root_pickle.Read_Wrapper.__init__
def __init__(self)
Definition: root_pickle.py:139
python.root_pickle.Write_Wrapper
Definition: root_pickle.py:105
python.root_pickle.Saver.__init__
def __init__(self)
Definition: root_pickle.py:274
python.root_pickle.Root_Proxy.__init__
def __init__(self, f, pid)
Definition: root_pickle.py:293
python.root_pickle.Unpickler.__use_proxy
__use_proxy
Definition: root_pickle.py:315
python.root_pickle.Pickler.__io
__io
Definition: root_pickle.py:206
python.root_pickle.Root_Proxy.__o
__o
Definition: root_pickle.py:296
python.root_pickle.Pickler._persistent_id
def _persistent_id(self, o)
Definition: root_pickle.py:238
python.root_pickle.Read_Wrapper.read
def read(self, i)
Definition: root_pickle.py:143
python.root_pickle.compat_hooks
def compat_hooks(hooks)
Definition: root_pickle.py:415
python.root_pickle.Unpickler.find_class
def find_class(self, module, name)
Definition: root_pickle.py:387
python.root_pickle.Write_Wrapper.reopen
def reopen(self)
Definition: root_pickle.py:132
python.root_pickle.Read_Wrapper._pos
_pos
Definition: root_pickle.py:187
python.root_pickle.Unpickler.persistent_load
def persistent_load(self, pid)
Definition: root_pickle.py:377
python.root_pickle.Saver.__chunks
__chunks
Definition: root_pickle.py:277
python.root_pickle.Root_Proxy.__getattr__
def __getattr__(self, a)
Definition: root_pickle.py:298
python.root_pickle.Write_Wrapper._s
_s
Definition: root_pickle.py:133
python.root_pickle.Write_Wrapper.getvalue
def getvalue(self)
Definition: root_pickle.py:129
python.root_pickle.Read_Wrapper
Definition: root_pickle.py:138
python.root_pickle._setdir
def _setdir(d)
Definition: root_pickle.py:85
python.root_pickle.Root_Proxy.__obj
def __obj(self)
Definition: root_pickle.py:304
python.root_pickle.dump
def dump(o, f, proto=0, key=None)
Definition: root_pickle.py:426
python.root_pickle.Unpickler.load
def load(self, key=None)
Definition: root_pickle.py:359
python.root_pickle.Pickler.clear_memo
def clear_memo(self)
Definition: root_pickle.py:231
python.root_pickle.Root_Proxy.__f
__f
Definition: root_pickle.py:294
python.root_pickle.Pickler.__pmap
__pmap
Definition: root_pickle.py:209
python.root_pickle.Read_Wrapper._str
_str
Definition: root_pickle.py:186
python.root_pickle.Write_Wrapper._str
_str
Definition: root_pickle.py:134
python.root_pickle.Pickler.__init__
def __init__(self, file, proto=0)
Definition: root_pickle.py:197
VKalVrtAthena::varHolder_detail::clear
void clear(T &var)
Definition: NtupleVars.h:48
python.CaloAddPedShiftConfig.int
int
Definition: CaloAddPedShiftConfig.py:45
python.root_pickle.Pickler
Definition: root_pickle.py:196
python.root_pickle.Unpickler.__file
__file
Definition: root_pickle.py:316
python.root_pickle.Saver
Definition: root_pickle.py:273
calibdata.cd
cd
Definition: calibdata.py:50
pickleTool.object
object
Definition: pickleTool.py:29
python.root_pickle.Pickler.dump
def dump(self, o, key=None)
Definition: root_pickle.py:213
python.root_pickle.load
def load(f, use_proxy=1, key=None)
Definition: root_pickle.py:430
python.root_pickle.Write_Wrapper.__init__
def __init__(self)
Definition: root_pickle.py:106
python.root_pickle.Unpickler
Definition: root_pickle.py:310
python.root_pickle._getdir
def _getdir()
Definition: root_pickle.py:76