#include <FPHelpers.h>

Collaboration diagram for FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >:

Static Public Member Functions
constexpr static unsigned int	total_size_bits ()

constexpr static unsigned int	mantissa_size_bits ()

constexpr static unsigned int	exponent_size_bits ()

template<class T >
constexpr static T	mantissa_mask ()

template<class T >
constexpr static T	exponent_mask ()

template<class T >
constexpr static T	sign_mask ()

template<class T >
constexpr static T	full_mask ()

template<class T >
constexpr static T	exponent_bias ()

template<class T >
constexpr static T	max_exponent_with_bias ()

template<class T >
constexpr static bool	is_infinite (const T pattern)

template<class T >
constexpr static bool	is_NaN (const T pattern)

template<class T >
constexpr static T	absolute_value (const T pattern)

template<class T >
constexpr static T	to_total_ordering (const T pattern)

template<class T >
constexpr static T	from_total_ordering (const T pattern)

template<class T >
constexpr static T	positive_zero ()

template<class T >
constexpr static T	negative_zero ()

template<class T >
constexpr static T	positive_infinity ()

template<class T >
constexpr static T	negative_infinity ()

template<class T >
constexpr static bool	round_results (const bool is_negative, const bool is_odd, const bool is_nearer_to_up, const bool is_tied, RoundingModes rt)

template<class T >
constexpr static T	add_patterns (const T a, const T b, const RoundingModes rt=RoundingModes::Default)
	The absolute value of must be greater than or equal than that of . More...

template<class T >
constexpr static T	subtract_patterns (const T a, const T b, const RoundingModes rt=RoundingModes::Default)
	The absolute value of must be greater than or equal than that of . More...

template<class T >
constexpr static T	add (const T a, const T b, const RoundingModes rt=RoundingModes::Default)

template<class T >
constexpr static T	subtract (const T a, const T b, const RoundingModes rt=RoundingModes::Default)

Detailed Description

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
class FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >

Specifies a floating point format like those described in IEEE-754, with an adjustable number of bits in the exponent and mantissa.

tag just allows differing floating point definitions with the same size, in case there are e. g. multiple implementations of native/faster operations available. Tag 0 is reserved for the default (i. e. non-native) implementation.

Definition at line 350 of file FPHelpers.h.

Member Function Documentation

◆ absolute_value()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::absolute_value ( const T pattern )

inlinestaticconstexpr

Definition at line 435 of file FPHelpers.h.

     {
       return pattern & (~sign_mask<T>());
     }

◆ add()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::add	(	const T	a,
		const T	b,
		const RoundingModes	rt = `RoundingModes::Default`
	)

inlinestaticconstexpr

Warning: Branchy as everything, and thus quite inefficient. The addition/subtraction routines themselves could be branchless, but we must select between them...

Probably the overhead from all the operations makes it worth it?

Definition at line 631 of file FPHelpers.h.

     {
       const T abs_a = absolute_value<T>(a);
       const T abs_b = absolute_value<T>(b);
  
       const bool sign_a = a & sign_mask<T>();
       const bool sign_b = b & sign_mask<T>();
  
       if (abs_b == 0)
         {
           return a;
         }
       if (abs_a == 0)
         {
           return b;
         }
  
       if (is_infinite<T>(a) && is_infinite<T>(b))
         {
           if (sign_a == sign_b)
             {
               return a;
             }
           else
             {
               return abs_a | (T(1) << (mantissa_size_bits() - 1));
               //A "quiet" NaN in most platforms.
             }
         }
       else if (is_NaN<T>(a))
         {
           return a;
         }
       else if (is_NaN<T>(b))
         {
           return b;
         }
  
       if (sign_a == sign_b)
         {
           if (abs_a >= abs_b)
             {
               return add_patterns<T>(a, b, rt);
             }
           else
             {
               return add_patterns<T>(b, a, rt);
             }
         }
       else
         {
           if (abs_a > abs_b)
             {
               return (sign_a * sign_mask<T>()) | subtract_patterns<T>(abs_a, abs_b, rt);
             }
           else if (abs_a == abs_b)
             {
               return 0;
             }
           else
             {
               return (sign_b * sign_mask<T>()) | subtract_patterns<T>(abs_b, abs_a, rt);
             }
         }
     }

◆ add_patterns()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::add_patterns	(	const T	a,
		const T	b,
		const RoundingModes	rt = `RoundingModes::Default`
	)

inlinestaticconstexpr

The absolute value of must be greater than or equal than that of .

We also don't handle zero, NaN or infinities here.

Definition at line 512 of file FPHelpers.h.

     {
       using namespace OperatorsHelper;
  
       constexpr unsigned int extra_bits = 2;
       //One sign and at least one exponent bit, we're safe!
  
       constexpr T first_not_mantissa_bit = T(1) << mantissa_size_bits();
  
       const T exp_a = (a & exponent_mask<T>()) >> mantissa_size_bits();
       const T exp_b = (b & exponent_mask<T>()) >> mantissa_size_bits();
  
       const bool a_denormal = (exp_a != 0);
       const bool b_denormal = (exp_b != 0);
  
       const bool use_second = (exp_a - exp_b) <= mantissa_size_bits() + 1 + extra_bits;
       const bool is_negative = a & sign_mask<T>();
  
       const T mantiss_a = ((a & mantissa_mask<T>()) | (first_not_mantissa_bit * a_denormal)) << extra_bits;
       const T mantiss_b = ((b & mantissa_mask<T>()) | (first_not_mantissa_bit * b_denormal)) << extra_bits;
       //To account for the overflow and rounding.
  
       T mantiss_ret = mantiss_a;
  
       mantiss_ret += safe_rshift(mantiss_b, exp_a - exp_b);
  
       mantiss_ret |= !!(safe_lshift(mantiss_b, exp_a - exp_b) & mantissa_mask<T>()) * use_second;
  
       const unsigned int leading_zeros = LeadingZerosPortability::count_leading_zeros<T>(mantiss_ret);
       constexpr unsigned int desired_number_of_zeros = sizeof(T) * CHAR_BIT - mantissa_size_bits() - 1 - extra_bits;
       const unsigned int shift_amount = clamped_sub(desired_number_of_zeros, leading_zeros);
  
       const T last_bit_mask = T(1) << (shift_amount + extra_bits);
       const T last_discarded_bit_mask = last_bit_mask >> 1;
       const T round_mask = (last_bit_mask - 1) * !!(last_bit_mask);
       const bool round_up = (mantiss_ret & round_mask) > last_discarded_bit_mask;
       const bool tied = last_discarded_bit_mask && ((mantiss_ret & round_mask) == last_discarded_bit_mask);
  
       bool round_bit = round_results<T>(is_negative, (mantiss_ret & last_bit_mask), round_up, tied, rt) && !!last_bit_mask;
  
       mantiss_ret = safe_rshift(mantiss_ret, shift_amount + extra_bits);
  
       mantiss_ret += round_bit * (shift_amount + extra_bits <= sizeof(T) * CHAR_BIT);
  
       const T exponent_ret = exp_a + shift_amount + (exp_a == 0 &&  mantiss_ret > mantissa_mask<T>());
  
       mantiss_ret &= mantissa_mask<T>();
  
       mantiss_ret &= ~( ( exponent_ret > max_exponent_with_bias<T>() ) * mantissa_mask<T>() );
       //If we somehow summed up to infinity,
       //unset the remaining bits.
  
       return (is_negative * sign_mask<T>()) | (exponent_ret << mantissa_size_bits()) | mantiss_ret;
     }

◆ exponent_bias()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::exponent_bias ( )

inlinestaticconstexpr

Definition at line 405 of file FPHelpers.h.

     {
       static_assert(sizeof(T) * CHAR_BIT >= (mantiss + exp + 1),
                     "The type must be large enough to hold the bit representation of the floating point." );
       return (T(1) << (exp - 1)) - 1;
     }

◆ exponent_mask()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::exponent_mask ( )

inlinestaticconstexpr

Definition at line 380 of file FPHelpers.h.

     {
       static_assert(sizeof(T) * CHAR_BIT >= (mantiss + exp + 1),
                     "The type must be large enough to hold the bit representation of the floating point." );
  
       T ret = (T(1) << exp) - 1;
       return ret << mantiss;
     }

◆ exponent_size_bits()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

constexpr static unsigned int FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::exponent_size_bits ( )

inlinestaticconstexpr

Definition at line 365 of file FPHelpers.h.

     {
       return exp;
     }

◆ from_total_ordering()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::from_total_ordering ( const T pattern )

inlinestaticconstexpr

Warning: Even though +0 and -0 should technically compare equal, we will convert between them too so the operation becomes fully reversible.

Definition at line 454 of file FPHelpers.h.

     {
       const T xor_mask = (!(pattern & sign_mask<T>()) * full_mask<T>()) | sign_mask<T>();
       return pattern ^ xor_mask;
     }

◆ full_mask()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::full_mask ( )

inlinestaticconstexpr

Definition at line 399 of file FPHelpers.h.

     {
       return mantissa_mask<T>() | exponent_mask<T>() | sign_mask<T>();
     }

◆ is_infinite()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static bool FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::is_infinite ( const T pattern )

inlinestaticconstexpr

Definition at line 421 of file FPHelpers.h.

     {
       return (pattern & (~sign_mask<T>())) == exponent_mask<T>();
     }

◆ is_NaN()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static bool FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::is_NaN ( const T pattern )

inlinestaticconstexpr

Definition at line 427 of file FPHelpers.h.

     {
       return (pattern & (~sign_mask<T>())) > exponent_mask<T>();
       //If it also has bits in the mantissa, it's greater than the mask.
       //Last bit is sign, so signedness of T is of no concern.
     }

◆ mantissa_mask()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::mantissa_mask ( )

inlinestaticconstexpr

Definition at line 371 of file FPHelpers.h.

     {
       static_assert(sizeof(T) * CHAR_BIT >= (mantiss + exp + 1),
                     "The type must be large enough to hold the bit representation of the floating point." );
       T ret = (T(1) << mantiss) - 1;
       return ret;
     }

◆ mantissa_size_bits()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

constexpr static unsigned int FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::mantissa_size_bits ( )

inlinestaticconstexpr

Definition at line 360 of file FPHelpers.h.

     {
       return mantiss;
     }

◆ max_exponent_with_bias()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::max_exponent_with_bias ( )

inlinestaticconstexpr

Definition at line 413 of file FPHelpers.h.

     {
       static_assert(sizeof(T) * CHAR_BIT >= (mantiss + exp + 1),
                     "The type must be large enough to hold the bit representation of the floating point." );
       return exponent_bias<T>() * 2;
     }

◆ negative_infinity()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::negative_infinity ( )

inlinestaticconstexpr

Definition at line 479 of file FPHelpers.h.

     {
       return sign_mask<T>() | exponent_mask<T>();
     }

◆ negative_zero()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::negative_zero ( )

inlinestaticconstexpr

Definition at line 467 of file FPHelpers.h.

     {
       return sign_mask<T>();
     }

◆ positive_infinity()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::positive_infinity ( )

inlinestaticconstexpr

Definition at line 473 of file FPHelpers.h.

     {
       return exponent_mask<T>();
     }

◆ positive_zero()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::positive_zero ( )

inlinestaticconstexpr

Definition at line 461 of file FPHelpers.h.

     {
       return T(0);
     }

◆ round_results()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static bool FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::round_results	(	const bool	is_negative,
		const bool	is_odd,
		const bool	is_nearer_to_up,
		const bool	is_tied,
		RoundingModes	rt
	)

inlinestaticconstexpr

Definition at line 485 of file FPHelpers.h.

     {
       switch (rt)
         {
           case RoundingModes::ToPlusInfinity:
             return !is_negative;
           case RoundingModes::ToMinusInfinity:
             return is_negative;
           case RoundingModes::ToZero:
             return 0;
           //Truncate => do nothing
           case RoundingModes::ToNearestEven:
             return is_nearer_to_up || (is_odd && is_tied);
           case RoundingModes::ToNearestAwayFromZero:
             return is_nearer_to_up || is_tied;
           default:
             return 0;
         }
     }

◆ sign_mask()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::sign_mask ( )

inlinestaticconstexpr

Definition at line 390 of file FPHelpers.h.

     {
       static_assert(sizeof(T) * CHAR_BIT >= (mantiss + exp + 1),
                     "The type must be large enough to hold the bit representation of the floating point." );
       T ret = T(1) << (exp + mantiss);
       return ret;
     }

◆ subtract()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::subtract	(	const T	a,
		const T	b,
		const RoundingModes	rt = `RoundingModes::Default`
	)

inlinestaticconstexpr

Definition at line 699 of file FPHelpers.h.

     {
       return add<T>(a, b ^ sign_mask<T>(), rt);
     }

◆ subtract_patterns()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::subtract_patterns	(	const T	a,
		const T	b,
		const RoundingModes	rt = `RoundingModes::Default`
	)

inlinestaticconstexpr

The absolute value of must be greater than or equal than that of .

We also don't handle zero, NaN or infinities here.

Definition at line 571 of file FPHelpers.h.

     {
       using namespace OperatorsHelper;
  
       constexpr unsigned int extra_bits = 2;
       //One sign and at least one exponent bit, we're safe!
  
       constexpr T first_not_mantissa_bit = T(1) << mantissa_size_bits();
  
       const T exp_a = (a & exponent_mask<T>()) >> mantissa_size_bits();
       const T exp_b = (b & exponent_mask<T>()) >> mantissa_size_bits();
  
       const bool use_second = (exp_a - exp_b) <= mantissa_size_bits() + 1 + extra_bits;
       const bool is_negative = a & sign_mask<T>();
  
       const T mantiss_a = ((a & mantissa_mask<T>()) | (first_not_mantissa_bit * (exp_a != 0))) << extra_bits;
       const T mantiss_b = ((b & mantissa_mask<T>()) | (first_not_mantissa_bit * (exp_b != 0))) << extra_bits;
       //To account for the overflow and rounding.
  
       T mantiss_ret = mantiss_a;
  
       mantiss_ret -= safe_rshift(mantiss_b, exp_a - exp_b) * use_second;
  
       mantiss_ret |= !!(safe_lshift(-mantiss_b, exp_a - exp_b) & mantissa_mask<T>()) * use_second;
  
       const unsigned int leading_zeros = LeadingZerosPortability::count_leading_zeros<T>(mantiss_ret);
       constexpr unsigned int desired_number_of_zeros = sizeof(T) * CHAR_BIT - mantissa_size_bits() - 1 - extra_bits;
       const unsigned int shift_amount = clamped_sub(leading_zeros, desired_number_of_zeros);
  
       const T last_bit_mask = T(1) << extra_bits;
       const T last_discarded_bit_mask = last_bit_mask >> 1;
       const T round_mask = (last_bit_mask - 1) * !!(last_bit_mask);
       const bool round_up = (mantiss_ret & round_mask) > last_discarded_bit_mask;
       const bool tied = last_discarded_bit_mask && ((mantiss_ret & round_mask) == last_discarded_bit_mask);
  
       bool round_bit = round_results<T>(is_negative, (mantiss_ret & last_bit_mask), round_up, tied, rt) && !!last_bit_mask;
  
       mantiss_ret >>= extra_bits;
  
       mantiss_ret += round_bit;
  
       mantiss_ret = safe_lshift(mantiss_ret, shift_amount);
  
       const T exponent_ret = clamped_sub(exp_a, shift_amount);
  
       mantiss_ret = safe_rshift(mantiss_ret, clamped_sub(shift_amount, exp_a));
  
       mantiss_ret &= mantissa_mask<T>();
  
       return (is_negative * sign_mask<T>()) | (exponent_ret << mantissa_size_bits()) | mantiss_ret;
     }

◆ to_total_ordering()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

template<class T >

constexpr static T FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::to_total_ordering ( const T pattern )

inlinestaticconstexpr

Warning: Even though +0 and -0 should technically compare equal, we will convert between them too so the operation becomes fully reversible.

Definition at line 444 of file FPHelpers.h.

     {
       const T xor_mask = (!!(pattern & sign_mask<T>()) * full_mask<T>()) | sign_mask<T>();
       return pattern ^ xor_mask;
     }

◆ total_size_bits()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>

constexpr static unsigned int FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >::total_size_bits ( )

inlinestaticconstexpr

Definition at line 355 of file FPHelpers.h.

     {
       return mantiss + exp + 1;
     }

The documentation for this class was generated from the following file:

FPHelpers.h

Static Public Member Functions

Detailed Description

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1> class FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >

Member Function Documentation

◆ absolute_value()

◆ add()

◆ add_patterns()

◆ exponent_bias()

◆ exponent_mask()

◆ exponent_size_bits()

◆ from_total_ordering()

◆ full_mask()

◆ is_infinite()

◆ is_NaN()

◆ mantissa_mask()

◆ mantissa_size_bits()

◆ max_exponent_with_bias()

◆ negative_infinity()

◆ negative_zero()

◆ positive_infinity()

◆ positive_zero()

◆ round_results()

◆ sign_mask()

◆ subtract()

◆ subtract_patterns()

◆ to_total_ordering()

◆ total_size_bits()

template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
class FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >