ATLAS Offline Software
Control
CxxUtils
Root
FloatCompressor.cxx
Go to the documentation of this file.
1
/*
2
Copyright (C) 2002-2022 CERN for the benefit of the ATLAS collaboration
3
*/
4
5
// $Id: FloatCompressor.cxx 789425 2016-12-13 10:50:12Z krasznaa $
6
7
// System include(s):
8
#include <cmath>
9
10
// Local include(s):
11
#include "
CxxUtils/FloatCompressor.h
"
12
13
namespace
CxxUtils
{
14
16
static
const
unsigned
int
NMANTISSA = 23;
17
18
FloatCompressor::FloatCompressor
(
unsigned
int
mantissaBits )
19
: m_mantissaBits( mantissaBits ), m_mantissaBitmask( 0 ) {
20
21
// IEEE754 single-precision float
22
// SEEE EEEE EMMM MMMM MMMM MMMM MMMM MMMM
23
// F F 8 0 0 0 7 F
24
25
// Definition:
26
//
27
// Assume that we'd like to keep only 7 bits in the mantissa
28
// In this case the memory layout of the bits will be:
29
//
30
// Sign | Exp (8 bits) | Frac (23 bits)
31
// S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT
32
//
33
// where
34
//
35
// S : Sign bit
36
// E : Exponent bits
37
// F : Fraction bits
38
// L : Least significant bit (lsb) for 7 bits mantissa precision
39
// R : Rounding bit
40
// T : Sticky bits (i.e any bit after lsb + 1)
41
//
42
// In the current implementation there are essentially 4 cases:
43
//
44
// Case 1: L = 0 and R = 0
45
// In this case there'll be rounding down
46
//
47
// Case 2: L = 1 and R = 0
48
// In this case there'll be rounding down
49
//
50
// Case 3: L = 0 and R = 1
51
// In this case there'll be rounding up
52
//
53
// Note: This scenario can be different than bfloat16 implementation
54
// of TensorFlow, where they round down if all the Ts are zero.
55
// Otherwise, they also round up.
56
//
57
// Case 4: L = 1 and R = 1
58
// In this case there'll be rounding up
59
//
60
// In all cases, we do an extra check to avoid overflow.
61
//
62
// From a technical point of view, the rounding is computed
63
// to be the half of the lsb(=1) and added to the original value
64
// as long as the new value doesn't overflow. Then the
65
// undesired bits are masked. We never go below 5 bits in the
66
// mantissa.
67
68
// Adjust the received bit number to some reasonable value:
69
if
(
m_mantissaBits
< 5 ) {
70
m_mantissaBits
= 5;
71
}
72
if
(
m_mantissaBits
> NMANTISSA ) {
73
m_mantissaBits
= NMANTISSA;
74
}
75
76
// Fill up the lower N bits:
77
for
(
unsigned
int
i
= 0;
i
< ( NMANTISSA -
m_mantissaBits
); ++
i
) {
78
m_mantissaBitmask
|= ( 0x1 <<
i
);
79
}
80
// And now negate it to get the correct mask:
81
m_mantissaBitmask
= ~
m_mantissaBitmask
;
82
83
// Set the Magic numbers
84
if
(
m_mantissaBits
== NMANTISSA) {
85
m_rounding
= 0;
86
}
87
else
{
88
m_rounding
= 0x1 << ( 32 - (1 + 8 +
m_mantissaBits
) - 1 );
89
}
90
// The part below is taken from AthenaPoolCnvSvc/Compressor
91
// and would work the same as long as the user doesn't
92
// compress lower than 3 mantissa bits, which is not allowed
93
// in any case.
94
m_vmax
= 0x7f7 << 20;
95
m_vmax
|= 0x000fffff xor (
m_rounding
);
96
}
97
98
float
FloatCompressor::reduceFloatPrecision
(
float
value
)
const
{
99
100
// Check if any compression is to be made:
101
if
(
m_mantissaBits
== NMANTISSA ) {
102
return
value
;
103
}
104
105
// Check for NaN, etc:
106
if
( ! std::isfinite(
value
) ) {
107
return
value
;
108
}
109
110
// Create the helper object:
111
floatint_t
fi
;
112
fi
.fvalue =
value
;
113
114
//safety-check if value (omitting the sign-bit) is lower than vmax
115
//(avoid overflow)
116
if
( (
fi
.ivalue & 0
x7fffffff
) <
m_vmax
) {
117
fi
.ivalue +=
m_rounding
;
118
}
119
120
// Do the compression:
121
fi
.ivalue &=
m_mantissaBitmask
;
122
return
fi
.fvalue;
123
}
124
125
}
// namespace CxxUtils
CxxUtils::FloatCompressor::m_mantissaBits
unsigned int m_mantissaBits
Number of mantissa bits to keep.
Definition:
FloatCompressor.h:47
athena.value
value
Definition:
athena.py:124
CxxUtils::FloatCompressor::m_rounding
uint32_t m_rounding
Definition:
FloatCompressor.h:55
lumiFormat.i
int i
Definition:
lumiFormat.py:85
CxxUtils::FloatCompressor::reduceFloatPrecision
float reduceFloatPrecision(float value) const
Function returning a reduced precision float value.
Definition:
FloatCompressor.cxx:98
FloatCompressor.h
CxxUtils
Definition:
aligned_vector.h:29
CxxUtils::FloatCompressor::floatint_t
Type used in the compression.
Definition:
FloatCompressor.h:40
CxxUtils::FloatCompressor::m_vmax
uint32_t m_vmax
Largest possible positive 32bit float minus the rounding.
Definition:
FloatCompressor.h:57
CaloCellPos2Ntuple.x7fffffff
x7fffffff
Definition:
CaloCellPos2Ntuple.py:24
checkFileSG.fi
fi
Definition:
checkFileSG.py:65
CxxUtils::FloatCompressor::FloatCompressor
FloatCompressor(unsigned int mantissaBits=7)
Constructor with the number of mantissa bits to retain.
Definition:
FloatCompressor.cxx:18
CxxUtils::FloatCompressor::m_mantissaBitmask
uint32_t m_mantissaBitmask
Bitmask for zeroing out the non-interesting bits.
Definition:
FloatCompressor.h:49
Generated on Thu Nov 7 2024 21:15:21 for ATLAS Offline Software by
1.8.18