ATLAS Offline Software
root_pickle.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2 
3 #
4 # File: root_pickle.py
5 # Created: sss, 2004.
6 # Purpose: Pickle python data into a root file, preserving references
7 # to root objects.
8 #
9 
10 """Pickle python data into a root file, preserving references to root objects.
11 
12 This module allows pickling python
13 objects into a root file. The python objects may contain
14 references to named root objects. If one has set up a structure
15 of python objects to hold root histograms, this provides a
16 convenient way of saving and restoring your histograms.
17 The pickled python data are stored in an additional string
18 object in the root file; any root objects are stored as usual.
19 (Thus, root files written by root_pickle can be
20 read just like any other root file if you don't care about the
21 python data.)
22 
23 Here's an example of writing a pickle:
24 
25  import ROOT
26  from PyAnalysisUtils.root_pickle import dump_root
27  hlist = []
28  for i in range (10):
29  name = 'h%d' % i
30  hlist.append (ROOT.TH1F (name, name, 10, 0, 10))
31  dump_root (hlist, 'test.root')
32 
33 This writes a list of histograms to test.root.
34 The histograms may be read back like this:
35 
36  import ROOT
37  from PyAnalysisUtils.root_pickle import load_root
38  hlist = load_root ('test.root')
39 
40 
41 The following additional notes apply.
42 
43  - In addition to dump_root and
44  load_root, the module also provides
45  dump and load functions, which
46  take root file objects instead of file names.
47 
48  - The present version of root_pickle will
49  not work correctly for the case of python objects deriving
50  from root objects. It will probably also not work for the
51  case of root objects which do not derive from
52  TObject.
53 
54  - When the pickled data are being read, if a class
55  doesn't exist, root_pickle will create a
56  dummy class with no methods and use that. This is
57  different from the standard pickle behavior (where it
58  would be an error), but it simplifies usage in the common
59  case where the class is being used to hold histograms, and
60  its methods are entirely concerned with filling the
61  histograms.
62 
63  - When restoring a reference to a root object, the default behavior
64  is to not read the root object itself, but instead to create a proxy.
65  The root object will then be read the first time the proxy is accessed.
66  This can help significantly with time and memory usage if you're
67  only accessing a small fraction of the root objects, but it does
68  mean that you need to keep the root file open. Pass use_proxy=0
69  to disable this behavior.
70 """
71 
72 import pickle
73 import ROOT
74 import sys
75 import six
76 from six.moves import intern
77 
78 def _getdir():
79  if hasattr (ROOT.TDirectory, 'CurrentDirectory'):
80  d = ROOT.TDirectory.CurrentDirectory()
81  if hasattr (d, 'load'):
82  # Handle case of CurrentDirectory() returning an atomic.
83  d = d.load()
84  return ROOT.gDirectory
85 
86 
87 def _setdir (d):
88  ROOT.TDirectory.cd (d)
89 
90 
91 #
92 # This stuff was originally written in terms of an stringIO stream.
93 # But with py3, i couldn't find a better way of getting bytes objects
94 # into and out of a TString.
95 #
96 # Argh! We can't store NULs in TObjStrings.
97 # But pickle protocols > 0 are binary protocols, and will get corrupted
98 # if we truncate at a NUL.
99 # So, when we save the pickle data, make the mappings:
100 # 0x00 -> 0xff 0x01
101 # 0xff -> 0xff 0xfe
102 # ... This may actually be obsolete --- looks like we can have NULs
103 # in TObjString now, if we access the TString direectly. But retain
104 # for compatibility with existing pickles.
105 #
106 
107 
108 if six.PY2:
109  from StringIO import StringIO
110  def _protect (s):
111  return s.replace ('\377', '\377\376').replace ('\000', '\377\001')
112  def _restore (s):
113  return s.replace ('\377\001', '\000').replace ('\377\376', '\377')
114 
115 
117  def __init__ (self):
118  self.reopen()
119 
120  def write (self, s):
121  return self.__s.write (_protect (s))
122 
123  def getvalue (self):
124  return ROOT.TObjString (self.__s.getvalue())
125 
126  def reopen (self):
127  self.__s = StringIO()
128  return
129 
130 
132  def __init__ (self):
133  self.reopen()
134 
135  def read (self, i):
136  return self.__s.read (i)
137 
138  def readline (self):
139  return self.__s.readline ()
140 
141  def setvalue (self, s):
142  self.__s = StringIO (_restore (s.GetName()))
143  return
144 
145  def reopen (self):
146  self.__s = StringIO()
147  return
148 
149 
150 else:
151  class Write_Wrapper:
152  def __init__ (self):
153  self.reopen()
154 
155  def write (self, s):
156  ss = self._str
157  log = []
158  for c in s:
159  code = c
160  if code == 0xff:
161  ss.Append (0xff)
162  ss.Append (0xfe)
163  log.append (0xff)
164  log.append (0xfe)
165  elif code == 0x00:
166  ss.Append (0xff)
167  ss.Append (0x01)
168  log.append (0xff)
169  log.append (0x01)
170  else:
171  ss.Append (code)
172  log.append (code)
173  return
174 
175  def getvalue (self):
176  return self._s
177 
178  def reopen (self):
179  self._s = ROOT.TObjString()
180  self._str = self._s.String()
181  return
182 
183 
184  class Read_Wrapper:
185  def __init__ (self):
186  self.reopen()
187 
188 
189  def read (self, i):
190  out = []
191  slen = len(self._str)
192  while i != 0 and self._pos < slen:
193  c = ord(self._str[self._pos])
194  if c == 0xff:
195  self._pos += 1
196  if self._pos >= slen:
197  break
198  c = ord(self._str[self._pos])
199  if c == 0x01:
200  c = 0x00
201  elif c == 0xfe:
202  c = 0xff
203  out.append (c)
204  self._pos += 1
205  i -= 1
206  return bytes(out)
207 
208 
209  def readline (self):
210  out = []
211  slen = len(self._str)
212  while self._pos < slen:
213  c = ord(self._str[self._pos])
214  if c == 0xff:
215  self._pos += 1
216  if self._pos >= slen:
217  break
218  c = ord(self._str[self._pos])
219  if c == 0x01:
220  c = 0x00
221  elif c == 0xfe:
222  c = 0xff
223  out.append (c)
224  self._pos += 1
225  if c == 10:
226  break
227  return bytes(out)
228 
229 
230  def setvalue (self, s):
231  self._s = s
232  self._str = self._s.String()
233  self._pos = 0
234  return
235 
236 
237  def reopen (self):
238  self.setvalue (ROOT.TObjString())
239  return
240 
241 
243  def __init__ (self, file, proto=0):
244  """Create a root pickler.
245 FILE should be a Root TFile. PROTO is the python pickle protocol
246 version to use. The python part will be pickled to a Root
247 TObjString called _pickle; it will contain references to the
248 Root objects.
249 """
250  self.__file = file
251  self.__keys = file.GetListOfKeys()
253  self.__pickle = pickle.Pickler (self.__io, proto)
254  self.__pickle.persistent_id = self._persistent_id
255  self.__pmap = {}
256  return
257 
258 
259  def dump (self, o, key=None):
260  """Write a pickled representation of o to the open TFile."""
261  if key is None:
262  key = '_pickle'
263  directory = _getdir()
264  try:
265  self.__file.cd()
266  self.__pickle.dump (o)
267  s = self.__io.getvalue()
268  self.__io.reopen()
269  s.Write (key)
270  self.__file.Flush()
271  self.__pmap.clear()
272  finally:
273  _setdir (directory)
274  return
275 
276 
277  def clear_memo (self):
278  """Clears the pickler's internal memo."""
279  self.__pickle.memo.clear()
280  return
281 
282 
283 
284  def _persistent_id (self, o):
285  if hasattr (o, '_Root_Proxy__obj'):
286  o = o._Root_Proxy__obj()
287  if (isinstance (o, ROOT.TObject)):
288  # Write the object, and return the resulting NAME;CYCLE.
289  # We used to to this like this:
290  #o.Write()
291  #k = self.__file.GetKey (o.GetName())
292  #pid = "%s;%d" % (k.GetName(), k.GetCycle())
293  # It turns out, though, that destroying the python objects
294  # referencing the TKeys is quite expensive (O(logN) where
295  # N is the total number of pyroot objects?). Although
296  # we want to allow for the case of saving multiple objects
297  # with the same name, the most common case is that the name
298  # has not already been written to the file. So we optimize
299  # for that case, doing the key lookup before we write the
300  # object, not after. (Note further: GetKey() is very slow
301  # if the key does not actually exist, as it does a linear
302  # search of the key list. We use FindObject instead for the
303  # initial lookup, which is a hashed lookup, but it is not
304  # guaranteed to find the highest cycle. So if we do
305  # find an existing key, we need to look up again using GetKey.
306  nm = o.GetName()
307  k = self.__keys.FindObject(nm)
308  o.Write()
309  if k:
310  k = self.__file.GetKey (nm)
311  pid = "%s;%d" % (nm, k.GetCycle())
312  else:
313  pid = nm + ";1"
314  return pid
315  return
316 
317 
318 
319 class Saver(object):
320  def __init__ (self):
321  self.__chunksize = 65536
322  self.__i = self.__chunksize
323  self.__chunks = []
324  return
325 
326  def add (self, o):
327  if self.__i >= self.__chunksize:
328  self.__chunks.append ([None] * self.__chunksize)
329  self.__i = 0
330  self.__chunks[-1][self.__i] = o
331  self.__i += 1
332  return
333 
334 
335 _compat_hooks = None
336 xsave=Saver()
338  __slots__ = ('__f', '__pid', '__o')
339  def __init__ (self, f, pid):
340  self.__f = f
341  self.__pid = intern(pid)
342  self.__o = None
343  return
344  def __getattr__ (self, a):
345  if self.__o is None:
346  self.__o = self.__f.Get (self.__pid)
347  if self.__o.__class__.__module__ != 'ROOT':
348  self.__o.__class__.__module__ = 'ROOT'
349  return getattr (self.__o, a)
350  def __obj (self):
351  if self.__o is None:
352  self.__o = self.__f.Get (self.__pid)
353  if self.__o.__class__.__module__ != 'ROOT':
354  self.__o.__class__.__module__ = 'ROOT'
355  return self.__o
356 class Unpickler (pickle.Unpickler):
357  def __init__ (self, file, use_proxy = True, use_hash = False):
358  """Create a root unpickler.
359 FILE should be a Root TFile.
360 """
361  self.__use_proxy = use_proxy
362  self.__file = file
364  pickle.Unpickler.__init__ (self, self.__io)
365 
366  self.__n = 0
367  xsave.add (file)
368 
369  if use_hash:
370  htab = {}
371  ctab = {}
372  for k in file.GetListOfKeys():
373  nm = k.GetName()
374  cy = k.GetCycle()
375  htab[(nm,cy)] = k
376  if cy > ctab.get(nm,0):
377  ctab[nm] = cy
378  htab[(nm,9999)] = k
379  file._htab = htab
380  oget = file.Get
381  def xget (nm0):
382  nm = nm0
383  ipos = nm.find (';')
384  if ipos >= 0:
385  cy = nm[ipos+1]
386  if cy == '*':
387  cy = 10000
388  else:
389  cy = int(cy)
390  nm = nm[:ipos-1]
391  else:
392  cy = 9999
393  ret = htab.get ((nm,cy), None)
394  if not ret:
395  print ("did't find", nm, cy, len(htab))
396  return oget (nm0)
397  #ctx = ROOT.TDirectory.TContext (file)
398  ret = ret.ReadObj()
399  #del ctx
400  return ret
401  file.Get = xget
402  return
403 
404 
405  def load (self, key=None):
406  """Read a pickled object representation from the open file."""
407  if key is None:
408  key = '_pickle'
409  o = None
410  if _compat_hooks:
411  save = _compat_hooks[0]()
412  try:
413  self.__n += 1
414  s = self.__file.Get (key + ';%d' % self.__n)
415  self.__io.setvalue (s)
416  o = pickle.Unpickler.load(self)
417  self.__io.reopen ()
418  finally:
419  if _compat_hooks:
420  save = _compat_hooks[1](save)
421  return o
422 
423  def persistent_load (self, pid):
424  if self.__use_proxy:
425  o = Root_Proxy (self.__file, pid)
426  else:
427  o = self.__file.Get (pid)
428  #print ('load ', pid, o)
429  xsave.add(o)
430  return o
431 
432 
433  def find_class (self, module, name):
434  if module == 'copy_reg':
435  module = 'copyreg'
436  elif module == '__builtin__':
437  module = 'builtins'
438  try:
439  try:
440  __import__(module)
441  mod = sys.modules[module]
442  except ImportError:
443  print ("Making dummy module %s" % (module))
444  class DummyModule:
445  pass
446  mod = DummyModule()
447  sys.modules[module] = mod
448  klass = getattr(mod, name)
449  return klass
450  except AttributeError:
451  print ("Making dummy class %s.%s" % (module, name))
452  mod = sys.modules[module]
453  class Dummy(object):
454  pass
455  setattr (mod, name, Dummy)
456  return Dummy
457  return
458 
459 
460 
461 def compat_hooks (hooks):
462  """Set compatibility hooks.
463 If this is set, then hooks[0] is called before loading,
464 and hooks[1] is called after loading. hooks[1] is called with
465 the return value of hooks[0] as an argument. This is useful
466 for backwards compatibility in some situations."""
467  global _compat_hooks
468  _compat_hooks = hooks
469  return
470 
471 
472 def dump (o, f, proto=0, key=None):
473  """Dump object O to the Root TFile F."""
474  return Pickler(f, proto).dump(o, key)
475 
476 def load (f, use_proxy = 1, key=None):
477  """Load an object from the Root TFile F."""
478  return Unpickler(f, use_proxy).load(key)
479 
480 def dump_root (o, fname, proto=0, key=None):
481  """Dump object O to the Root file named FNAME."""
482  f = ROOT.TFile (fname , "RECREATE")
483  dump (o, f, proto, key)
484  f.Close()
485  return
486 
487 def load_root (fname, use_proxy = 1, key=None):
488  """Load an object from the Root file named FNAME."""
489  return load (ROOT.TFile (fname), use_proxy, key)
490 
491 
python.root_pickle.Unpickler.__io
__io
Definition: root_pickle.py:363
python.root_pickle.Read_Wrapper.readline
def readline(self)
Definition: root_pickle.py:138
python.root_pickle.Saver.add
def add(self, o)
Definition: root_pickle.py:326
python.root_pickle.load_root
def load_root(fname, use_proxy=1, key=None)
Definition: root_pickle.py:487
python.root_pickle.Saver.__i
__i
Definition: root_pickle.py:322
python.root_pickle.Unpickler.__init__
def __init__(self, file, use_proxy=True, use_hash=False)
Definition: root_pickle.py:357
python.root_pickle.Read_Wrapper.__s
__s
Definition: root_pickle.py:142
python.root_pickle.Read_Wrapper.setvalue
def setvalue(self, s)
Definition: root_pickle.py:141
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
python.root_pickle.Write_Wrapper.write
def write(self, s)
Definition: root_pickle.py:120
python.root_pickle.Pickler.__keys
__keys
Definition: root_pickle.py:251
python.root_pickle.Root_Proxy
Definition: root_pickle.py:337
python.root_pickle.Read_Wrapper.reopen
def reopen(self)
Definition: root_pickle.py:145
python.root_pickle.Pickler.__pickle
__pickle
Definition: root_pickle.py:253
python.root_pickle.Unpickler.__n
__n
Definition: root_pickle.py:366
python.root_pickle.Read_Wrapper._s
_s
Definition: root_pickle.py:231
python.root_pickle.Saver.__chunksize
__chunksize
Definition: root_pickle.py:321
python.root_pickle.Pickler.__file
__file
Definition: root_pickle.py:250
python.root_pickle.dump_root
def dump_root(o, fname, proto=0, key=None)
Definition: root_pickle.py:480
python.root_pickle.Root_Proxy.__pid
__pid
Definition: root_pickle.py:341
python.root_pickle.Read_Wrapper.__init__
def __init__(self)
Definition: root_pickle.py:132
python.root_pickle.Write_Wrapper
Definition: root_pickle.py:116
python.root_pickle.Saver.__init__
def __init__(self)
Definition: root_pickle.py:320
python.root_pickle.Write_Wrapper.__s
__s
Definition: root_pickle.py:127
python.root_pickle.Root_Proxy.__init__
def __init__(self, f, pid)
Definition: root_pickle.py:339
python.root_pickle.Unpickler.__use_proxy
__use_proxy
Definition: root_pickle.py:361
python.root_pickle.Pickler.__io
__io
Definition: root_pickle.py:252
python.root_pickle.Root_Proxy.__o
__o
Definition: root_pickle.py:342
python.root_pickle.Pickler._persistent_id
def _persistent_id(self, o)
Definition: root_pickle.py:284
python.root_pickle.Read_Wrapper.read
def read(self, i)
Definition: root_pickle.py:135
python.root_pickle.compat_hooks
def compat_hooks(hooks)
Definition: root_pickle.py:461
python.root_pickle.Unpickler.find_class
def find_class(self, module, name)
Definition: root_pickle.py:433
python.root_pickle.Write_Wrapper.reopen
def reopen(self)
Definition: root_pickle.py:126
python.root_pickle.Read_Wrapper._pos
_pos
Definition: root_pickle.py:233
python.root_pickle.Unpickler.persistent_load
def persistent_load(self, pid)
Definition: root_pickle.py:423
python.root_pickle.Saver.__chunks
__chunks
Definition: root_pickle.py:323
python.root_pickle._protect
def _protect(s)
Definition: root_pickle.py:110
python.root_pickle.Root_Proxy.__getattr__
def __getattr__(self, a)
Definition: root_pickle.py:344
python.root_pickle.Write_Wrapper._s
_s
Definition: root_pickle.py:179
python.root_pickle.Write_Wrapper.getvalue
def getvalue(self)
Definition: root_pickle.py:123
python.root_pickle.Read_Wrapper
Definition: root_pickle.py:131
python.root_pickle._setdir
def _setdir(d)
Definition: root_pickle.py:87
python.root_pickle.Root_Proxy.__obj
def __obj(self)
Definition: root_pickle.py:350
python.root_pickle.dump
def dump(o, f, proto=0, key=None)
Definition: root_pickle.py:472
python.root_pickle.Unpickler.load
def load(self, key=None)
Definition: root_pickle.py:405
python.root_pickle.Pickler.clear_memo
def clear_memo(self)
Definition: root_pickle.py:277
python.root_pickle.Root_Proxy.__f
__f
Definition: root_pickle.py:340
python.root_pickle.Pickler.__pmap
__pmap
Definition: root_pickle.py:255
python.root_pickle.Read_Wrapper._str
_str
Definition: root_pickle.py:232
python.root_pickle.Write_Wrapper._str
_str
Definition: root_pickle.py:180
python.root_pickle.Pickler.__init__
def __init__(self, file, proto=0)
Definition: root_pickle.py:243
VKalVrtAthena::varHolder_detail::clear
void clear(T &var)
Definition: NtupleVars.h:48
python.root_pickle.Pickler
Definition: root_pickle.py:242
python.root_pickle.Unpickler.__file
__file
Definition: root_pickle.py:362
python.root_pickle.Saver
Definition: root_pickle.py:319
calibdata.cd
cd
Definition: calibdata.py:51
pickleTool.object
object
Definition: pickleTool.py:30
python.root_pickle.Pickler.dump
def dump(self, o, key=None)
Definition: root_pickle.py:259
python.root_pickle.load
def load(f, use_proxy=1, key=None)
Definition: root_pickle.py:476
python.root_pickle._restore
def _restore(s)
Definition: root_pickle.py:112
python.root_pickle.Write_Wrapper.__init__
def __init__(self)
Definition: root_pickle.py:117
python.root_pickle.Unpickler
Definition: root_pickle.py:356
python.root_pickle._getdir
def _getdir()
Definition: root_pickle.py:78