ATLAS Offline Software
Loading...
Searching...
No Matches
FloatPacker.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3*/
10
12#include "CxxUtils/ones.h"
14#include <limits>
15#include <string>
16#include <sstream>
17#include <iomanip>
18#include <stdexcept>
19#ifndef __APPLE__
20#include <ieee754.h> // ??? Is this standardized?
21#else
22//ieee754.h doesn't exist on MacOSX
23union ieee754_double
24{
25 long double d;
26 struct {
27 unsigned int negative:1;
28 unsigned int exponent:11;
29 /* Together these comprise the mantissa. */
30 unsigned int mantissa0:20;
31 unsigned int mantissa1:32;
32 } ieee;
33 struct {
34 unsigned int negative:1;
35 unsigned int exponent:11;
36 // cppcheck-suppress unusedStructMember
37 unsigned int quiet_nan:1;
38 /* Together these comprise the mantissa. */
39 unsigned int mantissa0:19;
40 unsigned int mantissa1:32;
41 } ieee_nan;
42};
43
44#define IEEE754_DOUBLE_BIAS 0x3ff /* Added to exponent. */
45
46#endif
47#include <stdint.h>
48
49
50namespace {
51
52
53
58union double_or_int {
59 ieee754_double d;
60 uint32_t i[2];
61};
62
63
64const int ieee754_double_bias = 0x3ff;
65const int ieee754_double_exponent_bits = 11;
66const int ieee754_double_mantissa0_bits = 20;
67const int ieee754_double_mantissa1_bits = 32;
68
69
71typedef CxxUtils::FloatPacker::Packdest Packdest;
72
74const int packdest_bits = std::numeric_limits<Packdest>::digits;
75
76
77// Handy constant: A Packdest with 1 in the high bit.
78const Packdest high_packdest_bit = (1U << (packdest_bits - 1));
79
80const Packdest ieee754_double_exponent_mask =
81 (1U << ieee754_double_exponent_bits) - 1;
82
83
90inline
91int max_int (int nbits)
92{
93 return ((1U << nbits) >> 1) - 1;
94}
95
96
103inline
104int min_int (int nbits)
105{
106 return static_cast<int>(~0U << nbits) >> 1;
107}
108
109
118void renormalize_denormal (int& exponent,
119 Packdest& mantissa)
120{
121 if (mantissa == 0)
122 exponent -= packdest_bits; // Lost precision here.
123 else {
124 while ((mantissa & high_packdest_bit) == 0) {
125 --exponent;
126 mantissa <<= 1;
127 }
128 mantissa <<= 1;
129 }
130}
131
132
146void underflow_to_denormal (int min_exp,
147 int round_bits,
148 int& exponent,
149 Packdest& mantissa)
150{
151 if (exponent <= min_exp) {
152 Packdest mantissa_in = mantissa;
153
154 // Denormalize the mantissa.
155 mantissa = (mantissa >> 1) | high_packdest_bit;
156
157 // Now shift it right.
158 int shift = min_exp - exponent;
159 if (shift < packdest_bits)
160 mantissa >>= shift;
161 else
162 mantissa = 0; // underflow to 0.
163
164 // Flag it as denormal.
165 exponent = min_exp;
166
167 // Handle rounding, if desired.
168 if (round_bits) {
169 //
170 // +- packdest_bits - round_bits - 1
171 // |- round_bits -|v
172 // mantissa: .................X....
173 // v- shift+1 -^
174 // mantissa_in: .....X................
175 // ^
176 // +- packdest_bits - round_bits + shift
177 //
178 int orig_pos = packdest_bits - round_bits + shift;
179 if (orig_pos < packdest_bits &&
180 ((static_cast<Packdest> (1) << orig_pos) & mantissa_in) != 0)
181 {
182 Packdest lsb = (static_cast<Packdest> (1) <<
183 (packdest_bits - round_bits));
184 Packdest lsbmask = ~ (lsb - 1);
185
186 // ??? If we overflow here, it means we have to go back
187 // to a normalized representation. Just punt for now.
188 if ((mantissa & lsbmask) != lsbmask)
189 mantissa += lsb;
190 }
191 }
192 }
193}
194
195
196} // unnamed namespace
197
198
199namespace CxxUtils {
200
201
213 int nmantissa,
214 double scale /*= 1*/,
215 bool is_signed /*= true*/,
216 bool round /*= false*/)
217 : m_nmantissa (nmantissa),
218 m_scale (scale),
219 m_is_signed (is_signed),
220 m_round (round)
221{
222 // scale==0 means not to scale.
223 // Use that instead of 1 since it's faster to test for 0.
224 if (scale == 1 || scale == 0)
225 m_invscale = 0;
226 else {
227 // Avoid spurious div-zero FPEs with clang.
229 m_invscale = 1. / m_scale;
230 }
231
232 // Set up other cached values.
234 if (m_is_signed)
235 --m_npack;
236
238
239 // Sign bit mask.
240 if (m_is_signed)
241 m_signmask = 1U << (nbits - 1);
242 else
243 m_signmask = 0;
244
245 // Number of exponent bits.
246 m_nexp = nbits - m_nmantissa;
248
249 // Minimum exponent value.
250 m_min_exp = min_int (m_nexp);
251
252 // Maximum exponent value.
253 m_max_exp = max_int (m_nexp);
254
255 if (m_npack < 1 || m_npack > nbits)
256 throw std::runtime_error ("Bad number of mantissa bits.");
257}
258
259
270FloatPacker::pack (double src, std::string* err /*= nullptr*/) const
271{
272 double_or_int d;
273 d.d.d = src;
274
275 // Fast-path for zero. (Purely an optimization.)
276 // Note: can't use a double compare here. On some architectures (eg, MIPS)
277 // a denormal will compare equal to zero.
278 if (d.i[0] == 0 && d.i[1] == 0)
279 return 0;
280
281 // Check for NaN and infinity.
282 if (d.d.ieee.exponent == ieee754_double_exponent_mask) {
283 if (err) {
284 std::ostringstream os;
285 os << "Bad float number: " << src << " (" << std::setbase(16) << d.i[0]
286 << " " << d.i[1] << ")";
287 *err = os.str();
288 }
289 d.d.d = 0;
290 }
291
292 if (m_invscale)
293 d.d.d *= m_invscale;
294
295 bool was_negative = false;
296 if (d.d.ieee.negative != 0) {
297 if (m_is_signed) {
298 was_negative = true;
299 d.d.d = -d.d.d;
300 }
301 else {
302 // Don't complain on -0.
303 if (d.d.d < 0 && err) {
304 std::ostringstream os;
305 os << "Float overflow during packing: " << src;
306 *err = os.str();
307 }
308 d.d.d = 0;
309 }
310 }
311
312 // Check for zero again.
313 // (Also need to preserve the sign; the scale division may
314 // have underflowed.)
315 if (d.i[0] == 0 && d.i[1] == 0) {
316 if (was_negative)
317 return m_signmask;
318 else
319 return 0;
320 }
321
322 // Get packdest_bits bits of mantissa.
323
324 Packdest mantissa =
325 (d.d.ieee.mantissa0 << (packdest_bits -
326 ieee754_double_mantissa0_bits)) |
327 (d.d.ieee.mantissa1 >>
328 (ieee754_double_mantissa1_bits -
329 (packdest_bits - ieee754_double_mantissa0_bits)));
330
331 // Get the unbiased exponent.
332 int exponent =
333 static_cast<int> (d.d.ieee.exponent) - ieee754_double_bias;
334
335 // Do rounding, if requested.
336 if (m_round) {
337 Packdest lsbmask = (1 << (packdest_bits - m_npack));
338 int roundbit;
339 Packdest roundmask;
340 if (lsbmask > 1) {
341 roundbit = (mantissa & (lsbmask >> 1));
342 roundmask = ~ static_cast<Packdest> (roundbit - 1);
343 }
344 else {
345 roundbit = (d.d.ieee.mantissa1 &
346 ((1 << ((ieee754_double_mantissa1_bits -
347 (packdest_bits -
348 ieee754_double_mantissa0_bits)) - 1))));
349 roundmask = ~ static_cast<Packdest> (0);
350 }
351
352 if (roundbit != 0) {
353 // Handle the case where it would overflow.
354 if ((mantissa & roundmask) == roundmask) {
355 mantissa >>= 1;
356 mantissa |= roundmask;
357 exponent += 1;
358 }
359
360 mantissa += lsbmask;
361 }
362 }
363
364 // If the number is too large, bitch, and reset to the largest number.
365 if (exponent > m_max_exp) {
366 if (err) {
367 std::ostringstream os;
368 os << "Float overflow during packing: " << src;
369 *err = os.str();
370 }
371 exponent = m_max_exp;
372 mantissa = static_cast<Packdest> (~0);
373 }
374
375 // Handle denormals. (We've already handled the zero case.)
376 if (exponent == - ieee754_double_bias)
377 renormalize_denormal (exponent, mantissa);
378
379 // If the number is too small, denormalize, or underflow to 0.
380 underflow_to_denormal (m_min_exp, m_round ? m_npack: 0, exponent, mantissa);
381
382 // Pack in the mantissa bits.
383 Packdest dest = mantissa >> (packdest_bits - m_npack);
384
385 // The exponent, if desired.
386 if (m_nexp > 0)
387 dest |= ((exponent - m_min_exp) << m_npack);
388
389 // And the optional sign bit.
390 if (was_negative)
391 dest |= m_signmask;
392
393 return dest;
394}
395
396
404double FloatPacker::unpack (Packdest val, std::string* err /*= nullptr*/) const
405{
406 // Fast-path for 0.
407 if (val == 0)
408 return 0;
409
410 // Break apart the packed value.
411 bool was_negative = false;
412 if ((val & m_signmask) != 0)
413 was_negative = true;
414
415 double d;
416
417 // Fast path for fixed-point representations.
418 if (m_nexp == 0) {
419 Packdest mantissa = (val & m_npack_ones);
420 d = mantissa / ((double)m_npack_ones + 1);
421 if (was_negative)
422 d *= -1;
423 }
424 else {
425 // Get the mantissa.
426 Packdest mantissa = (val & m_npack_ones) << (packdest_bits - m_npack);
427
428 // General case.
429 // Get the exponent.
430 int exponent = ((val >> m_npack) & m_nexp_ones);
431 exponent += m_min_exp; // unbias.
432
433 ieee754_double dd;
434
435 // Handle denormals.
436 if (exponent == m_min_exp) {
437 // Maybe it was -0?
438 if (mantissa == 0) {
439 dd.d = 0;
440 if (was_negative)
441 dd.ieee.negative = 1;
442 return dd.d;
443 }
444
445 renormalize_denormal (exponent, mantissa);
446 }
447
448 // Complain about overflow.
449 if (exponent >= max_int (ieee754_double_exponent_bits)) {
450 if (err) {
451 std::ostringstream os;
452 os << "Overflow while unpacking float; exponent: " << exponent;
453 *err = os.str();
454 }
455 exponent = max_int (ieee754_double_exponent_bits) + 1;
456 mantissa = 0; // Infinity.
457 }
458
459 // Underflow into denormal.
460 underflow_to_denormal ( - ieee754_double_bias, 0,
461 exponent, mantissa);
462
463 // Pack into a double.
464 dd.ieee.negative = was_negative ? 1 : 0;
465 dd.ieee.exponent = exponent + ieee754_double_bias;
466 dd.ieee.mantissa0 =
467 (mantissa >> (packdest_bits - ieee754_double_mantissa0_bits));
468 dd.ieee.mantissa1 =
469 (mantissa << (ieee754_double_mantissa0_bits -
470 (packdest_bits - ieee754_double_mantissa1_bits)));
471 d = dd.d;
472 }
473
474 // Set the result.
475 if (m_scale)
476 d *= m_scale;
477 return d;
478}
479
480
481} // namespace CxxUtils
Pack/unpack floating-point data from/to a given number of bits.
int m_nmantissa
Number of bits in the mantissa + sign bit.
double unpack(Packdest val, std::string *err=nullptr) const
Unpack the value VAL.
Packdest pack(double src, std::string *err=nullptr) const
Pack a value.
Packdest m_npack_ones
Mask with that many low bits set.
int m_min_exp
Minimum exponent value.
int m_nexp
Number of exponent bits.
uint32_t Packdest
Type into which we pack.
Definition FloatPacker.h:61
FloatPacker(int nbits, int nmantissa, double scale=1, bool is_signed=true, bool round=false)
Constructor.
bool m_is_signed
Should we use a sign bit?
double m_invscale
Inverse of scale.
int m_npack
Number of bits in mantissa (exclusive of any sign bit).
int m_max_exp
Maximum exponent value.
Packdest m_nexp_ones
Mask with that many low bits set.
Packdest m_signmask
Mask containing the sign bit (or 0 if there's no sign bit).
bool m_round
Should we round instead of truncating?
double m_scale
Scale factor for stored numbers.
constexpr T ones(unsigned int n)
Return a bit mask with the lower n bits set.
Definition ones.h:25
setEventNumber uint32_t
Construct a bit mask.
Tell the compiler to optimize assuming that FP may trap.
#define CXXUTILS_TRAPPING_FP
Definition trapping_fp.h:24