Skip to content
96 changes: 96 additions & 0 deletions core/test/base/half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,54 @@ TEST(FloatToHalf, TruncatesLargeNumberRoundToEven)
EXPECT_EQ(get_bits(neg_x2), get_bits("1" "11110" "1001001111"));
}

TEST(FloatToHalf, ConvertsRandomPositiveNumber)
{
half x = create_from_bits<float>("0" "01101011" "00011011000001011110010");

ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000010010"));
}

TEST(FloatToHalf, RoundsUpToEvenNumber)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe add another test from rounds down to even number by adding one last bit into the end to ensure we compare full tail before shift.

{
half x = create_from_bits<float>("0" "01101001" "11100000000000000000000");

ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000001000"));
}

TEST(FloatToHalf, RoundsDownToEvenNumber)
{
half x = create_from_bits<float>("1" "01101100" "11010100000000000000000");

ASSERT_EQ(get_bits(x), get_bits("1" "00000" "0000111010"));
}

TEST(FloatToHalf, LargestNumberThatConvertsToZero)
{
half x = create_from_bits<float>("0" "01100110" "00000000000000000000000");

ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000000"));
}

TEST(FloatToHalf, SmallestNumberThatDoesntConvertToZero)
{
half x = create_from_bits<float>("0" "01100110" "00000000000000000000001");
Comment thread
emre-safa marked this conversation as resolved.

ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000001"));
}

TEST(FloatToHalf, RandomNumberThatConvertsToPositiveZero)
{
half x = create_from_bits<float>("0" "01001101" "10010100101111001101010");

ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000000"));
}

TEST(FloatToHalf, RandomNumberThatConvertsToNegativeZero)
{
half x = create_from_bits<float>("1" "01001000" "10000010110100001010001");

ASSERT_EQ(get_bits(x), get_bits("1" "00000" "0000000000"));
}
Comment thread
emre-safa marked this conversation as resolved.

TEST(HalfToFloat, ConvertsOne)
{
Expand Down Expand Up @@ -195,4 +243,52 @@ TEST(HalfToFloat, ExtendsLargeNumber)
}


TEST(HalfToFloat, ConvertsPositiveRandomDenormal)
{
float x = create_from_bits<half>("0" "00000" "1110101100");

ASSERT_EQ(get_bits(x), get_bits("0" "01110000" "11010110000000000000000"));
}


TEST(HalfToFloat, ConvertsNegativeRandomDenormal)
{
float x = create_from_bits<half>("1" "00000" "0010111101");

ASSERT_EQ(get_bits(x), get_bits("1" "01101110" "01111010000000000000000"));
}


TEST(HalfToFloat, ConvertsSmallestPositiveDenormal)
Comment thread
yhmtsai marked this conversation as resolved.
{
float x = create_from_bits<half>("0" "00000" "0000000001");

ASSERT_EQ(get_bits(x), get_bits("0" "01100111" "00000000000000000000000"));
}


TEST(HalfToFloat, ConvertsSmallestNegativeDenormal)
{
float x = create_from_bits<half>("1" "00000" "1111111111");

ASSERT_EQ(get_bits(x), get_bits("1" "01110000" "11111111100000000000000"));
}


TEST(HalfToFloat, ConvertsLargestPositiveDenormal)
{
float x = create_from_bits<half>("0" "00000" "1111111111");

ASSERT_EQ(get_bits(x), get_bits("0" "01110000" "11111111100000000000000"));
}


TEST(HalfToFloat, ConvertsLargestNegativeDenormal)
{
float x = create_from_bits<half>("1" "00000" "0000000001");

ASSERT_EQ(get_bits(x), get_bits("1" "01100111" "00000000000000000000000"));
}


// clang-format on
74 changes: 71 additions & 3 deletions include/ginkgo/core/base/half.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,41 @@ class alignas(std::uint16_t) half {
return conv::shift_sign(data_) | exp;
} else if (f16_traits::is_denom(exp)) {
// TODO: handle denormals
return conv::shift_sign(data_);

// This can not be negative if f16_traits::is_denom(exp) is true
const auto tail_length =
Comment thread
yhmtsai marked this conversation as resolved.
Outdated
((f32_traits::bias_mask -
(data_ & f32_traits::exponent_mask)) >>
f32_traits::significand_bits) -
1;
if (tail_length > f32_traits::significand_bits + 1) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (tail_length > f32_traits::significand_bits + 1) {
// all significant (including implicitly leading 1) will be moved after representation field more than one digit (less than half) such that it will rounding to zero.
if (tail_length > f32_traits::significand_bits + 1) {

please feel free to rephrase the sentence

return conv::shift_sign(data_);
}

Comment thread
emre-safa marked this conversation as resolved.
// It would be better if defined with const?
auto tail =
(data_ & f32_traits::significand_mask) &
static_cast<f32_traits::bits_type>((1 << tail_length) - 1);

// Handle if the tail_length is 24. It means half precision will
// be the smallest possible number it can represent(or zero)
if (tail_length == f32_traits::significand_bits + 1) {
tail |= 1 << f32_traits::significand_bits;
}

auto new_significand =
((data_ & f32_traits::significand_mask) >> tail_length) |
(1 << (f32_traits::significand_bits - tail_length));

const auto result =
conv::shift_sign(data_) | exp | new_significand;

// It would be better if defined with constexpr
const auto half =
static_cast<f32_traits::bits_type>(1 << (tail_length - 1));
Comment thread
emre-safa marked this conversation as resolved.
Outdated

return result +
(tail > half || ((tail == half) && (result & 1)));
} else {
// Rounding to even
const auto result = conv::shift_sign(data_) | exp |
Expand All @@ -442,8 +476,42 @@ class alignas(std::uint16_t) half {
return conv::shift_sign(data_) | f32_traits::exponent_mask |
f32_traits::significand_mask;
} else if (f16_traits::is_denom(data_)) {
// TODO: handle denormals
return conv::shift_sign(data_);
if (!(data_ & f16_traits::significand_mask)) {
return conv::shift_sign(data_);
}

int leading_zeros{};

// Counts leading zeros in the significand to determine the
// normalization shift
#if defined(_MSC_VER)
unsigned long index{};
_BitScanReverse(&index, static_cast<std::uint32_t>(
f16_traits::significand_mask & data_));

leading_zeros = f16_traits::significand_bits - index - 1;
#else
leading_zeros =
__builtin_clz(static_cast<std::uint32_t>(
f16_traits::significand_mask & data_)) -
f16_traits::exponent_bits - f16_traits::sign_bits -
8 * (sizeof(conv::result_bits) - sizeof(conv::source_bits));

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
8 * (sizeof(conv::result_bits) - sizeof(conv::source_bits));
CHAR_BIT * (sizeof(conv::result_bits) - sizeof(conv::source_bits));

this is predefined macro from climits

#endif

// Computes the new exponent, 0xxxxxxxx000...00
auto new_exponent =
((conv::bias_change >> f32_traits::significand_bits) -
leading_zeros)
<< f32_traits::significand_bits;
Comment on lines +503 to +506

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
auto new_exponent =
((conv::bias_change >> f32_traits::significand_bits) -
leading_zeros)
<< f32_traits::significand_bits;
auto new_exponent =
conv::bias_change - (leading_zeros << f32_traits::significand_bits);

It also worth noting leading_zeros is alway less than bias change because we are at half -> float


// Shifts the original significand to normalize it, remove the
// implicit '1', and align it in the new 23-bit field
auto new_significand =
(static_cast<f32_traits::bits_type>(data_)
<< (conv::significand_offset + leading_zeros + 1)) &
f32_traits::significand_mask;

return conv::shift_sign(data_) | new_exponent | new_significand;
} else {
return conv::shift_sign(data_) | conv::shift_exponent(data_) |
conv::shift_significand(data_);
Expand Down
Loading