ATLAS Offline Software
Loading...
Searching...
No Matches
FloatingPointHelpers::IEEE754_like< mantiss, exp, tag > Class Template Reference

Specifies a floating point format like those described in IEEE-754, with an adjustable number of bits in the exponent and mantissa. More...

#include <FPHelpers.h>

Collaboration diagram for FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >:

Static Public Member Functions

static constexpr unsigned int total_size_bits ()
static constexpr unsigned int mantissa_size_bits ()
static constexpr unsigned int exponent_size_bits ()
template<class T>
static constexpr T mantissa_mask ()
template<class T>
static constexpr T exponent_mask ()
template<class T>
static constexpr T sign_mask ()
template<class T>
static constexpr T full_mask ()
template<class T>
static constexpr T exponent_bias ()
template<class T>
static constexpr T max_exponent_with_bias ()
template<class T>
static constexpr bool is_infinite (const T pattern)
template<class T>
static constexpr bool is_NaN (const T pattern)
template<class T>
static constexpr T absolute_value (const T pattern)
template<class T>
static constexpr T to_total_ordering (const T pattern)
template<class T>
static constexpr T from_total_ordering (const T pattern)
template<class T>
static constexpr T positive_zero ()
template<class T>
static constexpr T negative_zero ()
template<class T>
static constexpr T positive_infinity ()
template<class T>
static constexpr T negative_infinity ()
template<class T>
static constexpr bool round_results (const bool is_negative, const bool is_odd, const bool is_nearer_to_up, const bool is_tied, RoundingModes rt)
template<class T>
static constexpr T add_patterns (const T a, const T b, const RoundingModes rt=RoundingModes::Default)
 The absolute value of must be greater than or equal than that of .
template<class T>
static constexpr T subtract_patterns (const T a, const T b, const RoundingModes rt=RoundingModes::Default)
 The absolute value of must be greater than or equal than that of .
template<class T>
static constexpr T add (const T a, const T b, const RoundingModes rt=RoundingModes::Default)
template<class T>
static constexpr T subtract (const T a, const T b, const RoundingModes rt=RoundingModes::Default)

Detailed Description

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
class FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >

Specifies a floating point format like those described in IEEE-754, with an adjustable number of bits in the exponent and mantissa.

tag just allows differing floating point definitions with the same size, in case there are e. g. multiple implementations of native/faster operations available. Tag 0 is reserved for the default (i. e. non-native) implementation.

Definition at line 350 of file FPHelpers.h.

Member Function Documentation

◆ absolute_value()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::absolute_value ( const T pattern)
inlinestaticconstexpr

Definition at line 435 of file FPHelpers.h.

436 {
437 return pattern & (~sign_mask<T>());
438 }
Specifies a floating point format like those described in IEEE-754, with an adjustable number of bits...
Definition FPHelpers.h:351
static constexpr T sign_mask()
Definition FPHelpers.h:390

◆ add()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::add ( const T a,
const T b,
const RoundingModes rt = RoundingModes::Default )
inlinestaticconstexpr
Warning
Branchy as everything, and thus quite inefficient. The addition/subtraction routines themselves could be branchless, but we must select between them...

Probably the overhead from all the operations makes it worth it?

Definition at line 631 of file FPHelpers.h.

632 {
633 const T abs_a = absolute_value<T>(a);
634 const T abs_b = absolute_value<T>(b);
635
636 const bool sign_a = a & sign_mask<T>();
637 const bool sign_b = b & sign_mask<T>();
638
639 if (abs_b == 0)
640 {
641 return a;
642 }
643 if (abs_a == 0)
644 {
645 return b;
646 }
647
649 {
650 if (sign_a == sign_b)
651 {
652 return a;
653 }
654 else
655 {
656 return abs_a | (T(1) << (mantissa_size_bits() - 1));
657 //A "quiet" NaN in most platforms.
658 }
659 }
660 else if (is_NaN<T>(a))
661 {
662 return a;
663 }
664 else if (is_NaN<T>(b))
665 {
666 return b;
667 }
668
669 if (sign_a == sign_b)
670 {
671 if (abs_a >= abs_b)
672 {
673 return add_patterns<T>(a, b, rt);
674 }
675 else
676 {
677 return add_patterns<T>(b, a, rt);
678 }
679 }
680 else
681 {
682 if (abs_a > abs_b)
683 {
685 }
686 else if (abs_a == abs_b)
687 {
688 return 0;
689 }
690 else
691 {
693 }
694 }
695 }
static constexpr unsigned int mantissa_size_bits()
Definition FPHelpers.h:360
static constexpr T add_patterns(const T a, const T b, const RoundingModes rt=RoundingModes::Default)
The absolute value of must be greater than or equal than that of .
Definition FPHelpers.h:512
static constexpr bool is_NaN(const T pattern)
Definition FPHelpers.h:427
static constexpr T absolute_value(const T pattern)
Definition FPHelpers.h:435
static constexpr bool is_infinite(const T pattern)
Definition FPHelpers.h:421
static constexpr T subtract_patterns(const T a, const T b, const RoundingModes rt=RoundingModes::Default)
The absolute value of must be greater than or equal than that of .
Definition FPHelpers.h:571

◆ add_patterns()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::add_patterns ( const T a,
const T b,
const RoundingModes rt = RoundingModes::Default )
inlinestaticconstexpr

The absolute value of must be greater than or equal than that of .

We also don't handle zero, NaN or infinities here.

Definition at line 512 of file FPHelpers.h.

513 {
514 using namespace OperatorsHelper;
515
516 constexpr unsigned int extra_bits = 2;
517 //One sign and at least one exponent bit, we're safe!
518
519 constexpr T first_not_mantissa_bit = T(1) << mantissa_size_bits();
520
521 const T exp_a = (a & exponent_mask<T>()) >> mantissa_size_bits();
522 const T exp_b = (b & exponent_mask<T>()) >> mantissa_size_bits();
523
524 const bool a_denormal = (exp_a != 0);
525 const bool b_denormal = (exp_b != 0);
526
527 const bool use_second = (exp_a - exp_b) <= mantissa_size_bits() + 1 + extra_bits;
528 const bool is_negative = a & sign_mask<T>();
529
532 //To account for the overflow and rounding.
533
535
537
539
541 constexpr unsigned int desired_number_of_zeros = sizeof(T) * CHAR_BIT - mantissa_size_bits() - 1 - extra_bits;
543
544 const T last_bit_mask = T(1) << (shift_amount + extra_bits);
546 const T round_mask = (last_bit_mask - 1) * !!(last_bit_mask);
549
551
553
555
557
559
561 //If we somehow summed up to infinity,
562 //unset the remaining bits.
563
565 }
static constexpr T exponent_mask()
Definition FPHelpers.h:380
static constexpr T max_exponent_with_bias()
Definition FPHelpers.h:413
static constexpr T mantissa_mask()
Definition FPHelpers.h:371
static constexpr bool round_results(const bool is_negative, const bool is_odd, const bool is_nearer_to_up, const bool is_tied, RoundingModes rt)
Definition FPHelpers.h:485

◆ exponent_bias()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::exponent_bias ( )
inlinestaticconstexpr

Definition at line 405 of file FPHelpers.h.

406 {
407 static_assert(sizeof(T) * CHAR_BIT >= (mantiss + exp + 1),
408 "The type must be large enough to hold the bit representation of the floating point." );
409 return (T(1) << (exp - 1)) - 1;
410 }

◆ exponent_mask()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::exponent_mask ( )
inlinestaticconstexpr

Definition at line 380 of file FPHelpers.h.

381 {
382 static_assert(sizeof(T) * CHAR_BIT >= (mantiss + exp + 1),
383 "The type must be large enough to hold the bit representation of the floating point." );
384
385 T ret = (T(1) << exp) - 1;
386 return ret << mantiss;
387 }

◆ exponent_size_bits()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
constexpr unsigned int FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::exponent_size_bits ( )
inlinestaticconstexpr

Definition at line 365 of file FPHelpers.h.

366 {
367 return exp;
368 }

◆ from_total_ordering()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::from_total_ordering ( const T pattern)
inlinestaticconstexpr
Warning
Even though +0 and -0 should technically compare equal, we will convert between them too so the operation becomes fully reversible.

Definition at line 454 of file FPHelpers.h.

455 {
456 const T xor_mask = (!(pattern & sign_mask<T>()) * full_mask<T>()) | sign_mask<T>();
457 return pattern ^ xor_mask;
458 }
static constexpr T full_mask()
Definition FPHelpers.h:399

◆ full_mask()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::full_mask ( )
inlinestaticconstexpr

Definition at line 399 of file FPHelpers.h.

400 {
402 }

◆ is_infinite()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr bool FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::is_infinite ( const T pattern)
inlinestaticconstexpr

Definition at line 421 of file FPHelpers.h.

422 {
423 return (pattern & (~sign_mask<T>())) == exponent_mask<T>();
424 }

◆ is_NaN()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr bool FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::is_NaN ( const T pattern)
inlinestaticconstexpr

Definition at line 427 of file FPHelpers.h.

428 {
429 return (pattern & (~sign_mask<T>())) > exponent_mask<T>();
430 //If it also has bits in the mantissa, it's greater than the mask.
431 //Last bit is sign, so signedness of T is of no concern.
432 }

◆ mantissa_mask()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::mantissa_mask ( )
inlinestaticconstexpr

Definition at line 371 of file FPHelpers.h.

372 {
373 static_assert(sizeof(T) * CHAR_BIT >= (mantiss + exp + 1),
374 "The type must be large enough to hold the bit representation of the floating point." );
375 T ret = (T(1) << mantiss) - 1;
376 return ret;
377 }

◆ mantissa_size_bits()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
constexpr unsigned int FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::mantissa_size_bits ( )
inlinestaticconstexpr

Definition at line 360 of file FPHelpers.h.

361 {
362 return mantiss;
363 }

◆ max_exponent_with_bias()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::max_exponent_with_bias ( )
inlinestaticconstexpr

Definition at line 413 of file FPHelpers.h.

414 {
415 static_assert(sizeof(T) * CHAR_BIT >= (mantiss + exp + 1),
416 "The type must be large enough to hold the bit representation of the floating point." );
417 return exponent_bias<T>() * 2;
418 }
static constexpr T exponent_bias()
Definition FPHelpers.h:405

◆ negative_infinity()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::negative_infinity ( )
inlinestaticconstexpr

Definition at line 479 of file FPHelpers.h.

480 {
481 return sign_mask<T>() | exponent_mask<T>();
482 }

◆ negative_zero()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::negative_zero ( )
inlinestaticconstexpr

Definition at line 467 of file FPHelpers.h.

468 {
469 return sign_mask<T>();
470 }

◆ positive_infinity()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::positive_infinity ( )
inlinestaticconstexpr

Definition at line 473 of file FPHelpers.h.

474 {
475 return exponent_mask<T>();
476 }

◆ positive_zero()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::positive_zero ( )
inlinestaticconstexpr

Definition at line 461 of file FPHelpers.h.

462 {
463 return T(0);
464 }

◆ round_results()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr bool FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::round_results ( const bool is_negative,
const bool is_odd,
const bool is_nearer_to_up,
const bool is_tied,
RoundingModes rt )
inlinestaticconstexpr

Definition at line 485 of file FPHelpers.h.

488 {
489 switch (rt)
490 {
492 return !is_negative;
494 return is_negative;
496 return 0;
497 //Truncate => do nothing
499 return is_nearer_to_up || (is_odd && is_tied);
501 return is_nearer_to_up || is_tied;
502 default:
503 return 0;
504 }
505 }

◆ sign_mask()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::sign_mask ( )
inlinestaticconstexpr

Definition at line 390 of file FPHelpers.h.

391 {
392 static_assert(sizeof(T) * CHAR_BIT >= (mantiss + exp + 1),
393 "The type must be large enough to hold the bit representation of the floating point." );
394 T ret = T(1) << (exp + mantiss);
395 return ret;
396 }

◆ subtract()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::subtract ( const T a,
const T b,
const RoundingModes rt = RoundingModes::Default )
inlinestaticconstexpr

Definition at line 699 of file FPHelpers.h.

700 {
701 return add<T>(a, b ^ sign_mask<T>(), rt);
702 }
static constexpr T add(const T a, const T b, const RoundingModes rt=RoundingModes::Default)
Definition FPHelpers.h:631

◆ subtract_patterns()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::subtract_patterns ( const T a,
const T b,
const RoundingModes rt = RoundingModes::Default )
inlinestaticconstexpr

The absolute value of must be greater than or equal than that of .

We also don't handle zero, NaN or infinities here.

Definition at line 571 of file FPHelpers.h.

572 {
573 using namespace OperatorsHelper;
574
575 constexpr unsigned int extra_bits = 2;
576 //One sign and at least one exponent bit, we're safe!
577
578 constexpr T first_not_mantissa_bit = T(1) << mantissa_size_bits();
579
580 const T exp_a = (a & exponent_mask<T>()) >> mantissa_size_bits();
581 const T exp_b = (b & exponent_mask<T>()) >> mantissa_size_bits();
582
583 const bool use_second = (exp_a - exp_b) <= mantissa_size_bits() + 1 + extra_bits;
584 const bool is_negative = a & sign_mask<T>();
585
586 const T mantiss_a = ((a & mantissa_mask<T>()) | (first_not_mantissa_bit * (exp_a != 0))) << extra_bits;
587 const T mantiss_b = ((b & mantissa_mask<T>()) | (first_not_mantissa_bit * (exp_b != 0))) << extra_bits;
588 //To account for the overflow and rounding.
589
591
593
595
597 constexpr unsigned int desired_number_of_zeros = sizeof(T) * CHAR_BIT - mantissa_size_bits() - 1 - extra_bits;
599
600 const T last_bit_mask = T(1) << extra_bits;
602 const T round_mask = (last_bit_mask - 1) * !!(last_bit_mask);
605
607
609
611
613
615
617
619
621 }

◆ to_total_ordering()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T>
constexpr T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::to_total_ordering ( const T pattern)
inlinestaticconstexpr
Warning
Even though +0 and -0 should technically compare equal, we will convert between them too so the operation becomes fully reversible.

Definition at line 444 of file FPHelpers.h.

445 {
446 const T xor_mask = (!!(pattern & sign_mask<T>()) * full_mask<T>()) | sign_mask<T>();
447 return pattern ^ xor_mask;
448 }

◆ total_size_bits()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
constexpr unsigned int FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::total_size_bits ( )
inlinestaticconstexpr

Definition at line 355 of file FPHelpers.h.

356 {
357 return mantiss + exp + 1;
358 }

The documentation for this class was generated from the following file: