Loading [MathJax]/extensions/MathMenu.js
ATLAS Offline Software
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
FloatPacker.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3 */
11 #include "CxxUtils/FloatPacker.h"
12 #include "CxxUtils/ones.h"
13 #include "CxxUtils/trapping_fp.h"
14 #include <limits>
15 #include <string>
16 #include <sstream>
17 #include <iomanip>
18 #include <stdexcept>
19 #ifndef __APPLE__
20 #include <ieee754.h> // ??? Is this standardized?
21 #else
22 //ieee754.h doesn't exist on MacOSX
23 union ieee754_double
24 {
25  long double d;
26  struct {
27  unsigned int negative:1;
28  unsigned int exponent:11;
29  /* Together these comprise the mantissa. */
30  unsigned int mantissa0:20;
31  unsigned int mantissa1:32;
32  } ieee;
33  struct {
34  unsigned int negative:1;
35  unsigned int exponent:11;
36  // cppcheck-suppress unusedStructMember
37  unsigned int quiet_nan:1;
38  /* Together these comprise the mantissa. */
39  unsigned int mantissa0:19;
40  unsigned int mantissa1:32;
41  } ieee_nan;
42 };
43 
44 #define IEEE754_DOUBLE_BIAS 0x3ff /* Added to exponent. */
45 
46 #endif
47 #include <stdint.h>
48 
49 
50 namespace {
51 
52 
53 
58 union double_or_int {
59  ieee754_double d;
60  uint32_t i[2];
61 };
62 
63 
64 const int ieee754_double_bias = 0x3ff;
65 const int ieee754_double_exponent_bits = 11;
66 const int ieee754_double_mantissa0_bits = 20;
67 const int ieee754_double_mantissa1_bits = 32;
68 
69 
71 typedef CxxUtils::FloatPacker::Packdest Packdest;
72 
74 const int packdest_bits = std::numeric_limits<Packdest>::digits;
75 
76 
77 // Handy constant: A Packdest with 1 in the high bit.
78 const Packdest high_packdest_bit = (1U << (packdest_bits - 1));
79 
80 const Packdest ieee754_double_exponent_mask =
81  (1U << ieee754_double_exponent_bits) - 1;
82 
83 
90 inline
91 int max_int (int nbits)
92 {
93  return ((1U << nbits) >> 1) - 1;
94 }
95 
96 
103 inline
104 int min_int (int nbits)
105 {
106  return static_cast<int>(~0U << nbits) >> 1;
107 }
108 
109 
118 void renormalize_denormal (int& exponent,
119  Packdest& mantissa)
120 {
121  if (mantissa == 0)
122  exponent -= packdest_bits; // Lost precision here.
123  else {
124  while ((mantissa & high_packdest_bit) == 0) {
125  --exponent;
126  mantissa <<= 1;
127  }
128  mantissa <<= 1;
129  }
130 }
131 
132 
146 void underflow_to_denormal (int min_exp,
147  int round_bits,
148  int& exponent,
149  Packdest& mantissa)
150 {
151  if (exponent <= min_exp) {
152  Packdest mantissa_in = mantissa;
153 
154  // Denormalize the mantissa.
155  mantissa = (mantissa >> 1) | high_packdest_bit;
156 
157  // Now shift it right.
158  int shift = min_exp - exponent;
159  if (shift < packdest_bits)
160  mantissa >>= shift;
161  else
162  mantissa = 0; // underflow to 0.
163 
164  // Flag it as denormal.
165  exponent = min_exp;
166 
167  // Handle rounding, if desired.
168  if (round_bits) {
169  //
170  // +- packdest_bits - round_bits - 1
171  // |- round_bits -|v
172  // mantissa: .................X....
173  // v- shift+1 -^
174  // mantissa_in: .....X................
175  // ^
176  // +- packdest_bits - round_bits + shift
177  //
178  int orig_pos = packdest_bits - round_bits + shift;
179  if (orig_pos < packdest_bits &&
180  ((static_cast<Packdest> (1) << orig_pos) & mantissa_in) != 0)
181  {
182  Packdest lsb = (static_cast<Packdest> (1) <<
183  (packdest_bits - round_bits));
184  Packdest lsbmask = ~ (lsb - 1);
185 
186  // ??? If we overflow here, it means we have to go back
187  // to a normalized representation. Just punt for now.
188  if ((mantissa & lsbmask) != lsbmask)
189  mantissa += lsb;
190  }
191  }
192  }
193 }
194 
195 
196 } // unnamed namespace
197 
198 
199 namespace CxxUtils {
200 
201 
213  int nmantissa,
214  double scale /*= 1*/,
215  bool is_signed /*= true*/,
216  bool round /*= false*/)
217  : m_nmantissa (nmantissa),
218  m_scale (scale),
219  m_is_signed (is_signed),
220  m_round (round)
221 {
222  // scale==0 means not to scale.
223  // Use that instead of 1 since it's faster to test for 0.
224  if (scale == 1 || scale == 0)
225  m_invscale = 0;
226  else {
227  // Avoid spurious div-zero FPEs with clang.
229  m_invscale = 1. / m_scale;
230  }
231 
232  // Set up other cached values.
234  if (m_is_signed)
235  --m_npack;
236 
237  m_npack_ones = ones<Packdest> (m_npack);
238 
239  // Sign bit mask.
240  if (m_is_signed)
241  m_signmask = 1U << (nbits - 1);
242  else
243  m_signmask = 0;
244 
245  // Number of exponent bits.
246  m_nexp = nbits - m_nmantissa;
247  m_nexp_ones = ones<Packdest> (m_nexp);
248 
249  // Minimum exponent value.
250  m_min_exp = min_int (m_nexp);
251 
252  // Maximum exponent value.
253  m_max_exp = max_int (m_nexp);
254 
255  if (m_npack < 1 || m_npack > nbits)
256  throw std::runtime_error ("Bad number of mantissa bits.");
257 }
258 
259 
270 FloatPacker::pack (double src, std::string* err /*= nullptr*/) const
271 {
272  double_or_int d;
273  d.d.d = src;
274 
275  // Fast-path for zero. (Purely an optimization.)
276  // Note: can't use a double compare here. On some architectures (eg, MIPS)
277  // a denormal will compare equal to zero.
278  if (d.i[0] == 0 && d.i[1] == 0)
279  return 0;
280 
281  // Check for NaN and infinity.
282  if (d.d.ieee.exponent == ieee754_double_exponent_mask) {
283  if (err) {
284  std::ostringstream os;
285  os << "Bad float number: " << src << " (" << std::setbase(16) << d.i[0]
286  << " " << d.i[1] << ")";
287  *err = os.str();
288  }
289  d.d.d = 0;
290  }
291 
292  if (m_invscale)
293  d.d.d *= m_invscale;
294 
295  bool was_negative = false;
296  if (d.d.ieee.negative != 0) {
297  if (m_is_signed) {
298  was_negative = true;
299  d.d.d = -d.d.d;
300  }
301  else {
302  // Don't complain on -0.
303  if (d.d.d < 0 && err) {
304  std::ostringstream os;
305  os << "Float overflow during packing: " << src;
306  *err = os.str();
307  }
308  d.d.d = 0;
309  }
310  }
311 
312  // Check for zero again.
313  // (Also need to preserve the sign; the scale division may
314  // have underflowed.)
315  if (d.i[0] == 0 && d.i[1] == 0) {
316  if (was_negative)
317  return m_signmask;
318  else
319  return 0;
320  }
321 
322  // Get packdest_bits bits of mantissa.
323 
324  Packdest mantissa =
325  (d.d.ieee.mantissa0 << (packdest_bits -
326  ieee754_double_mantissa0_bits)) |
327  (d.d.ieee.mantissa1 >>
328  (ieee754_double_mantissa1_bits -
329  (packdest_bits - ieee754_double_mantissa0_bits)));
330 
331  // Get the unbiased exponent.
332  int exponent =
333  static_cast<int> (d.d.ieee.exponent) - ieee754_double_bias;
334 
335  // Do rounding, if requested.
336  if (m_round) {
337  Packdest lsbmask = (1 << (packdest_bits - m_npack));
338  int roundbit;
339  Packdest roundmask;
340  if (lsbmask > 1) {
341  roundbit = (mantissa & (lsbmask >> 1));
342  roundmask = ~ static_cast<Packdest> (roundbit - 1);
343  }
344  else {
345  roundbit = (d.d.ieee.mantissa1 &
346  ((1 << ((ieee754_double_mantissa1_bits -
347  (packdest_bits -
348  ieee754_double_mantissa0_bits)) - 1))));
349  roundmask = ~ static_cast<Packdest> (0);
350  }
351 
352  if (roundbit != 0) {
353  // Handle the case where it would overflow.
354  if ((mantissa & roundmask) == roundmask) {
355  mantissa >>= 1;
356  mantissa |= roundmask;
357  exponent += 1;
358  }
359 
360  mantissa += lsbmask;
361  }
362  }
363 
364  // If the number is too large, bitch, and reset to the largest number.
365  if (exponent > m_max_exp) {
366  if (err) {
367  std::ostringstream os;
368  os << "Float overflow during packing: " << src;
369  *err = os.str();
370  }
371  exponent = m_max_exp;
372  mantissa = static_cast<Packdest> (~0);
373  }
374 
375  // Handle denormals. (We've already handled the zero case.)
376  if (exponent == - ieee754_double_bias)
377  renormalize_denormal (exponent, mantissa);
378 
379  // If the number is too small, denormalize, or underflow to 0.
380  underflow_to_denormal (m_min_exp, m_round ? m_npack: 0, exponent, mantissa);
381 
382  // Pack in the mantissa bits.
383  Packdest dest = mantissa >> (packdest_bits - m_npack);
384 
385  // The exponent, if desired.
386  if (m_nexp > 0)
387  dest |= ((exponent - m_min_exp) << m_npack);
388 
389  // And the optional sign bit.
390  if (was_negative)
391  dest |= m_signmask;
392 
393  return dest;
394 }
395 
396 
404 double FloatPacker::unpack (Packdest val, std::string* err /*= nullptr*/) const
405 {
406  // Fast-path for 0.
407  if (val == 0)
408  return 0;
409 
410  // Break apart the packed value.
411  bool was_negative = false;
412  if ((val & m_signmask) != 0)
413  was_negative = true;
414 
415  double d;
416 
417  // Fast path for fixed-point representations.
418  if (m_nexp == 0) {
419  Packdest mantissa = (val & m_npack_ones);
420  d = mantissa / ((double)m_npack_ones + 1);
421  if (was_negative)
422  d *= -1;
423  }
424  else {
425  // Get the mantissa.
426  Packdest mantissa = (val & m_npack_ones) << (packdest_bits - m_npack);
427 
428  // General case.
429  // Get the exponent.
430  int exponent = ((val >> m_npack) & m_nexp_ones);
431  exponent += m_min_exp; // unbias.
432 
433  ieee754_double dd;
434 
435  // Handle denormals.
436  if (exponent == m_min_exp) {
437  // Maybe it was -0?
438  if (mantissa == 0) {
439  dd.d = 0;
440  if (was_negative)
441  dd.ieee.negative = 1;
442  return dd.d;
443  }
444 
445  renormalize_denormal (exponent, mantissa);
446  }
447 
448  // Complain about overflow.
449  if (exponent >= max_int (ieee754_double_exponent_bits)) {
450  if (err) {
451  std::ostringstream os;
452  os << "Overflow while unpacking float; exponent: " << exponent;
453  *err = os.str();
454  }
455  exponent = max_int (ieee754_double_exponent_bits) + 1;
456  mantissa = 0; // Infinity.
457  }
458 
459  // Underflow into denormal.
460  underflow_to_denormal ( - ieee754_double_bias, 0,
461  exponent, mantissa);
462 
463  // Pack into a double.
464  dd.ieee.negative = was_negative ? 1 : 0;
465  dd.ieee.exponent = exponent + ieee754_double_bias;
466  dd.ieee.mantissa0 =
467  (mantissa >> (packdest_bits - ieee754_double_mantissa0_bits));
468  dd.ieee.mantissa1 =
469  (mantissa << (ieee754_double_mantissa0_bits -
470  (packdest_bits - ieee754_double_mantissa1_bits)));
471  d = dd.d;
472  }
473 
474  // Set the result.
475  if (m_scale)
476  d *= m_scale;
477  return d;
478 }
479 
480 
481 } // namespace CxxUtils
CxxUtils::FloatPacker::FloatPacker
FloatPacker(int nbits, int nmantissa, double scale=1, bool is_signed=true, bool round=false)
Constructor.
Definition: FloatPacker.cxx:212
CXXUTILS_TRAPPING_FP
#define CXXUTILS_TRAPPING_FP
Definition: trapping_fp.h:24
xAOD::uint32_t
setEventNumber uint32_t
Definition: EventInfo_v1.cxx:127
WriteCellNoiseToCool.src
src
Definition: WriteCellNoiseToCool.py:513
hist_file_dump.d
d
Definition: hist_file_dump.py:143
MuonGM::round
float round(const float toRound, const unsigned int decimals)
Definition: Mdt.cxx:27
CxxUtils::FloatPacker::m_is_signed
bool m_is_signed
Should we use a sign bit?
Definition: FloatPacker.h:114
perfmonmt-printer.dest
dest
Definition: perfmonmt-printer.py:189
yodamerge_tmp.scale
scale
Definition: yodamerge_tmp.py:138
CxxUtils::FloatPacker::m_npack_ones
Packdest m_npack_ones
Mask with that many low bits set.
Definition: FloatPacker.h:123
CxxUtils::FloatPacker::m_signmask
Packdest m_signmask
Mask containing the sign bit (or 0 if there's no sign bit).
Definition: FloatPacker.h:126
CxxUtils::FloatPacker::m_min_exp
int m_min_exp
Minimum exponent value.
Definition: FloatPacker.h:135
dqt_zlumi_pandas.err
err
Definition: dqt_zlumi_pandas.py:182
lumiFormat.i
int i
Definition: lumiFormat.py:85
CxxUtils::FloatPacker::m_nexp
int m_nexp
Number of exponent bits.
Definition: FloatPacker.h:129
CxxUtils
Definition: aligned_vector.h:29
CxxUtils::FloatPacker::m_round
bool m_round
Should we round instead of truncating?
Definition: FloatPacker.h:117
CxxUtils::FloatPacker::Packdest
uint32_t Packdest
Type into which we pack.
Definition: FloatPacker.h:61
xAOD::double
double
Definition: CompositeParticle_v1.cxx:159
CxxUtils::FloatPacker::pack
Packdest pack(double src, std::string *err=nullptr) const
Pack a value.
Definition: FloatPacker.cxx:270
ones.h
Construct a bit mask.
ReadFromCoolCompare.os
os
Definition: ReadFromCoolCompare.py:231
trapping_fp.h
Tell the compiler to optimize assuming that FP may trap.
library_scraper.dd
list dd
Definition: library_scraper.py:46
CxxUtils::FloatPacker::m_max_exp
int m_max_exp
Maximum exponent value.
Definition: FloatPacker.h:138
CxxUtils::FloatPacker::m_nmantissa
int m_nmantissa
Number of bits in the mantissa + sign bit.
Definition: FloatPacker.h:105
CxxUtils::FloatPacker::m_nexp_ones
Packdest m_nexp_ones
Mask with that many low bits set.
Definition: FloatPacker.h:132
CxxUtils::FloatPacker::unpack
double unpack(Packdest val, std::string *err=nullptr) const
Unpack the value VAL.
Definition: FloatPacker.cxx:404
Pythia8_RapidityOrderMPI.val
val
Definition: Pythia8_RapidityOrderMPI.py:14
FloatPacker.h
Pack/unpack floating-point data from/to a given number of bits.
CxxUtils::FloatPacker::m_invscale
double m_invscale
Inverse of scale.
Definition: FloatPacker.h:111
CxxUtils::FloatPacker::m_npack
int m_npack
Number of bits in mantissa (exclusive of any sign bit).
Definition: FloatPacker.h:120
CxxUtils::FloatPacker::m_scale
double m_scale
Scale factor for stored numbers.
Definition: FloatPacker.h:108