33 uint32_t bits = std::bit_cast<uint32_t>(val);
37 const int max_exp = (1 << (exp_bits - 1)) - 1;
38 const int min_exp = -(max_exp - 1);
39 const int exp_raw =
static_cast<int>((bits >> 23) & 0xFFu);
40 if (exp_raw != 0xFF) {
41 const int actual_exp = exp_raw - 127;
42 if (actual_exp > max_exp) {
43 const uint32_t
sign = bits & 0x80000000u;
44 const uint32_t sat_exp =
static_cast<uint32_t
>(max_exp + 127) << 23;
45 const uint32_t sat_man = (1u << 23) - 1u;
46 bits =
sign | sat_exp | sat_man;
47 }
else if (actual_exp < min_exp) {
54 const int k = 31 - exp_bits - mantissa_bits;
55 if (k > 0 && k < 32) {
56 const uint32_t mask = ~((1u << k) - 1u);
57 const uint32_t round_bias = (1u << (k - 1)) - 1u + ((bits >> k) & 1u);
58 bits = (bits + round_bias) & mask;
60 return std::bit_cast<float>(bits);