template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
class FloatingPointHelpers::IEEE754_like< mantiss, exp, tag >
Specifies a floating point format like those described in IEEE-754, with an adjustable number of bits in the exponent and mantissa.
tag
just allows differing floating point definitions with the same size, in case there are e. g. multiple implementations of native/faster operations available. Tag 0 is reserved for the default (i. e. non-native) implementation.
Definition at line 350 of file FPHelpers.h.
template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T >
The absolute value of must be greater than or equal than that of .
We also don't handle zero, NaN or infinities here.
Definition at line 512 of file FPHelpers.h.
514 using namespace OperatorsHelper;
516 constexpr
unsigned int extra_bits = 2;
524 const bool a_denormal = (exp_a != 0);
525 const bool b_denormal = (exp_b != 0);
528 const bool is_negative =
a & sign_mask<T>();
530 const T mantiss_a = ((
a & mantissa_mask<T>()) | (first_not_mantissa_bit * a_denormal)) << extra_bits;
531 const T mantiss_b = ((
b & mantissa_mask<T>()) | (first_not_mantissa_bit * b_denormal)) << extra_bits;
534 T mantiss_ret = mantiss_a;
536 mantiss_ret += safe_rshift(mantiss_b, exp_a - exp_b);
538 mantiss_ret |= !!(safe_lshift(mantiss_b, exp_a - exp_b) & mantissa_mask<T>()) * use_second;
540 const unsigned int leading_zeros = LeadingZerosPortability::count_leading_zeros<T>(mantiss_ret);
541 constexpr
unsigned int desired_number_of_zeros =
sizeof(
T) * CHAR_BIT -
mantissa_size_bits() - 1 - extra_bits;
542 const unsigned int shift_amount = clamped_sub(desired_number_of_zeros, leading_zeros);
544 const T last_bit_mask =
T(1) << (shift_amount + extra_bits);
545 const T last_discarded_bit_mask = last_bit_mask >> 1;
546 const T round_mask = (last_bit_mask - 1) * !!(last_bit_mask);
547 const bool round_up = (mantiss_ret & round_mask) > last_discarded_bit_mask;
548 const bool tied = last_discarded_bit_mask && ((mantiss_ret & round_mask) == last_discarded_bit_mask);
550 bool round_bit = round_results<T>(is_negative, (mantiss_ret & last_bit_mask), round_up, tied, rt) && !!last_bit_mask;
552 mantiss_ret = safe_rshift(mantiss_ret, shift_amount + extra_bits);
554 mantiss_ret += round_bit * (shift_amount + extra_bits <=
sizeof(
T) * CHAR_BIT);
556 const T exponent_ret = exp_a + shift_amount + (exp_a == 0 && mantiss_ret > mantissa_mask<T>());
558 mantiss_ret &= mantissa_mask<T>();
560 mantiss_ret &= ~( ( exponent_ret > max_exponent_with_bias<T>() ) * mantissa_mask<T>() );
564 return (is_negative * sign_mask<T>()) | (exponent_ret <<
mantissa_size_bits()) | mantiss_ret;
template<unsigned int mantiss, unsigned int exp, unsigned int tag = 1>
template<class T >
The absolute value of must be greater than or equal than that of .
We also don't handle zero, NaN or infinities here.
Definition at line 571 of file FPHelpers.h.
573 using namespace OperatorsHelper;
575 constexpr
unsigned int extra_bits = 2;
584 const bool is_negative =
a & sign_mask<T>();
586 const T mantiss_a = ((
a & mantissa_mask<T>()) | (first_not_mantissa_bit * (exp_a != 0))) << extra_bits;
587 const T mantiss_b = ((
b & mantissa_mask<T>()) | (first_not_mantissa_bit * (exp_b != 0))) << extra_bits;
590 T mantiss_ret = mantiss_a;
592 mantiss_ret -= safe_rshift(mantiss_b, exp_a - exp_b) * use_second;
594 mantiss_ret |= !!(safe_lshift(-mantiss_b, exp_a - exp_b) & mantissa_mask<T>()) * use_second;
596 const unsigned int leading_zeros = LeadingZerosPortability::count_leading_zeros<T>(mantiss_ret);
597 constexpr
unsigned int desired_number_of_zeros =
sizeof(
T) * CHAR_BIT -
mantissa_size_bits() - 1 - extra_bits;
598 const unsigned int shift_amount = clamped_sub(leading_zeros, desired_number_of_zeros);
600 const T last_bit_mask =
T(1) << extra_bits;
601 const T last_discarded_bit_mask = last_bit_mask >> 1;
602 const T round_mask = (last_bit_mask - 1) * !!(last_bit_mask);
603 const bool round_up = (mantiss_ret & round_mask) > last_discarded_bit_mask;
604 const bool tied = last_discarded_bit_mask && ((mantiss_ret & round_mask) == last_discarded_bit_mask);
606 bool round_bit = round_results<T>(is_negative, (mantiss_ret & last_bit_mask), round_up, tied, rt) && !!last_bit_mask;
608 mantiss_ret >>= extra_bits;
610 mantiss_ret += round_bit;
612 mantiss_ret = safe_lshift(mantiss_ret, shift_amount);
614 const T exponent_ret = clamped_sub(exp_a, shift_amount);
616 mantiss_ret = safe_rshift(mantiss_ret, clamped_sub(shift_amount, exp_a));
618 mantiss_ret &= mantissa_mask<T>();
620 return (is_negative * sign_mask<T>()) | (exponent_ret <<
mantissa_size_bits()) | mantiss_ret;