ATLAS Offline Software
FloatCompressor.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2022 CERN for the benefit of the ATLAS collaboration
3 */
4 
5 // $Id: FloatCompressor.cxx 789425 2016-12-13 10:50:12Z krasznaa $
6 
7 // System include(s):
8 #include <cmath>
9 
10 // Local include(s):
12 
13 namespace CxxUtils {
14 
16  static const unsigned int NMANTISSA = 23;
17 
18  FloatCompressor::FloatCompressor( unsigned int mantissaBits )
19  : m_mantissaBits( mantissaBits ), m_mantissaBitmask( 0 ) {
20 
21  // IEEE754 single-precision float
22  // SEEE EEEE EMMM MMMM MMMM MMMM MMMM MMMM
23  // F F 8 0 0 0 7 F
24 
25  // Definition:
26  //
27  // Assume that we'd like to keep only 7 bits in the mantissa
28  // In this case the memory layout of the bits will be:
29  //
30  // Sign | Exp (8 bits) | Frac (23 bits)
31  // S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT
32  //
33  // where
34  //
35  // S : Sign bit
36  // E : Exponent bits
37  // F : Fraction bits
38  // L : Least significant bit (lsb) for 7 bits mantissa precision
39  // R : Rounding bit
40  // T : Sticky bits (i.e any bit after lsb + 1)
41  //
42  // In the current implementation there are essentially 4 cases:
43  //
44  // Case 1: L = 0 and R = 0
45  // In this case there'll be rounding down
46  //
47  // Case 2: L = 1 and R = 0
48  // In this case there'll be rounding down
49  //
50  // Case 3: L = 0 and R = 1
51  // In this case there'll be rounding up
52  //
53  // Note: This scenario can be different than bfloat16 implementation
54  // of TensorFlow, where they round down if all the Ts are zero.
55  // Otherwise, they also round up.
56  //
57  // Case 4: L = 1 and R = 1
58  // In this case there'll be rounding up
59  //
60  // In all cases, we do an extra check to avoid overflow.
61  //
62  // From a technical point of view, the rounding is computed
63  // to be the half of the lsb(=1) and added to the original value
64  // as long as the new value doesn't overflow. Then the
65  // undesired bits are masked. We never go below 5 bits in the
66  // mantissa.
67 
68  // Adjust the received bit number to some reasonable value:
69  if( m_mantissaBits < 5 ) {
70  m_mantissaBits = 5;
71  }
72  if( m_mantissaBits > NMANTISSA ) {
73  m_mantissaBits = NMANTISSA;
74  }
75 
76  // Fill up the lower N bits:
77  for( unsigned int i = 0; i < ( NMANTISSA - m_mantissaBits ); ++i ) {
78  m_mantissaBitmask |= ( 0x1 << i );
79  }
80  // And now negate it to get the correct mask:
82 
83  // Set the Magic numbers
84  if (m_mantissaBits == NMANTISSA) {
85  m_rounding = 0;
86  }
87  else {
88  m_rounding = 0x1 << ( 32 - (1 + 8 + m_mantissaBits) - 1 );
89  }
90  // The part below is taken from AthenaPoolCnvSvc/Compressor
91  // and would work the same as long as the user doesn't
92  // compress lower than 3 mantissa bits, which is not allowed
93  // in any case.
94  m_vmax = 0x7f7 << 20;
95  m_vmax |= 0x000fffff xor (m_rounding);
96  }
97 
99 
100  // Check if any compression is to be made:
101  if( m_mantissaBits == NMANTISSA ) {
102  return value;
103  }
104 
105  // Check for NaN, etc:
106  if( ! std::isfinite( value ) ) {
107  return value;
108  }
109 
110  // Create the helper object:
111  floatint_t fi;
112  fi.fvalue = value;
113 
114  //safety-check if value (omitting the sign-bit) is lower than vmax
115  //(avoid overflow)
116  if( ( fi.ivalue & 0x7fffffff ) < m_vmax ) {
117  fi.ivalue += m_rounding;
118  }
119 
120  // Do the compression:
121  fi.ivalue &= m_mantissaBitmask;
122  return fi.fvalue;
123  }
124 
125 } // namespace CxxUtils
CxxUtils::FloatCompressor::m_mantissaBits
unsigned int m_mantissaBits
Number of mantissa bits to keep.
Definition: FloatCompressor.h:47
athena.value
value
Definition: athena.py:122
CxxUtils::FloatCompressor::m_rounding
uint32_t m_rounding
Definition: FloatCompressor.h:55
lumiFormat.i
int i
Definition: lumiFormat.py:92
CxxUtils::FloatCompressor::reduceFloatPrecision
float reduceFloatPrecision(float value) const
Function returning a reduced precision float value.
Definition: FloatCompressor.cxx:98
FloatCompressor.h
CxxUtils
Definition: aligned_vector.h:29
CxxUtils::FloatCompressor::floatint_t
Type used in the compression.
Definition: FloatCompressor.h:40
CxxUtils::FloatCompressor::m_vmax
uint32_t m_vmax
Largest possible positive 32bit float minus the rounding.
Definition: FloatCompressor.h:57
CaloCellPos2Ntuple.x7fffffff
x7fffffff
Definition: CaloCellPos2Ntuple.py:24
checkFileSG.fi
fi
Definition: checkFileSG.py:65
CxxUtils::FloatCompressor::FloatCompressor
FloatCompressor(unsigned int mantissaBits=7)
Constructor with the number of mantissa bits to retain.
Definition: FloatCompressor.cxx:18
CxxUtils::FloatCompressor::m_mantissaBitmask
uint32_t m_mantissaBitmask
Bitmask for zeroing out the non-interesting bits.
Definition: FloatCompressor.h:49