-
Notifications
You must be signed in to change notification settings - Fork 119
implement conversion between half and single precision for denormals #1927
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 5 commits
709d320
39e76b1
e43238b
05f7e86
3af5540
fbc7b3b
312ef7e
fc1ad57
1bcfb05
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -416,7 +416,41 @@ class alignas(std::uint16_t) half { | |||||||||||||
| return conv::shift_sign(data_) | exp; | ||||||||||||||
| } else if (f16_traits::is_denom(exp)) { | ||||||||||||||
| // TODO: handle denormals | ||||||||||||||
| return conv::shift_sign(data_); | ||||||||||||||
|
|
||||||||||||||
| // This can not be negative if f16_traits::is_denom(exp) is true | ||||||||||||||
| const auto tail_length = | ||||||||||||||
|
yhmtsai marked this conversation as resolved.
Outdated
|
||||||||||||||
| ((f32_traits::bias_mask - | ||||||||||||||
| (data_ & f32_traits::exponent_mask)) >> | ||||||||||||||
| f32_traits::significand_bits) - | ||||||||||||||
| 1; | ||||||||||||||
| if (tail_length > f32_traits::significand_bits + 1) { | ||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
please feel free to rephrase the sentence |
||||||||||||||
| return conv::shift_sign(data_); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
|
emre-safa marked this conversation as resolved.
|
||||||||||||||
| // It would be better if defined with const? | ||||||||||||||
| auto tail = | ||||||||||||||
| (data_ & f32_traits::significand_mask) & | ||||||||||||||
| static_cast<f32_traits::bits_type>((1 << tail_length) - 1); | ||||||||||||||
|
|
||||||||||||||
| // Handle if the tail_length is 24. It means half precision will | ||||||||||||||
| // be the smallest possible number it can represent(or zero) | ||||||||||||||
| if (tail_length == f32_traits::significand_bits + 1) { | ||||||||||||||
| tail |= 1 << f32_traits::significand_bits; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| auto new_significand = | ||||||||||||||
| ((data_ & f32_traits::significand_mask) >> tail_length) | | ||||||||||||||
| (1 << (f32_traits::significand_bits - tail_length)); | ||||||||||||||
|
|
||||||||||||||
| const auto result = | ||||||||||||||
| conv::shift_sign(data_) | exp | new_significand; | ||||||||||||||
|
|
||||||||||||||
| // It would be better if defined with constexpr | ||||||||||||||
| const auto half = | ||||||||||||||
| static_cast<f32_traits::bits_type>(1 << (tail_length - 1)); | ||||||||||||||
|
emre-safa marked this conversation as resolved.
Outdated
|
||||||||||||||
|
|
||||||||||||||
| return result + | ||||||||||||||
| (tail > half || ((tail == half) && (result & 1))); | ||||||||||||||
| } else { | ||||||||||||||
| // Rounding to even | ||||||||||||||
| const auto result = conv::shift_sign(data_) | exp | | ||||||||||||||
|
|
@@ -442,8 +476,42 @@ class alignas(std::uint16_t) half { | |||||||||||||
| return conv::shift_sign(data_) | f32_traits::exponent_mask | | ||||||||||||||
| f32_traits::significand_mask; | ||||||||||||||
| } else if (f16_traits::is_denom(data_)) { | ||||||||||||||
| // TODO: handle denormals | ||||||||||||||
| return conv::shift_sign(data_); | ||||||||||||||
| if (!(data_ & f16_traits::significand_mask)) { | ||||||||||||||
| return conv::shift_sign(data_); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| int leading_zeros{}; | ||||||||||||||
|
|
||||||||||||||
| // Counts leading zeros in the significand to determine the | ||||||||||||||
| // normalization shift | ||||||||||||||
| #if defined(_MSC_VER) | ||||||||||||||
| unsigned long index{}; | ||||||||||||||
| _BitScanReverse(&index, static_cast<std::uint32_t>( | ||||||||||||||
| f16_traits::significand_mask & data_)); | ||||||||||||||
|
|
||||||||||||||
| leading_zeros = f16_traits::significand_bits - index - 1; | ||||||||||||||
| #else | ||||||||||||||
| leading_zeros = | ||||||||||||||
| __builtin_clz(static_cast<std::uint32_t>( | ||||||||||||||
| f16_traits::significand_mask & data_)) - | ||||||||||||||
| f16_traits::exponent_bits - f16_traits::sign_bits - | ||||||||||||||
| 8 * (sizeof(conv::result_bits) - sizeof(conv::source_bits)); | ||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
this is predefined macro from climits |
||||||||||||||
| #endif | ||||||||||||||
|
|
||||||||||||||
| // Computes the new exponent, 0xxxxxxxx000...00 | ||||||||||||||
| auto new_exponent = | ||||||||||||||
| ((conv::bias_change >> f32_traits::significand_bits) - | ||||||||||||||
| leading_zeros) | ||||||||||||||
| << f32_traits::significand_bits; | ||||||||||||||
|
Comment on lines
+503
to
+506
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
It also worth noting leading_zeros is alway less than bias change because we are at half -> float |
||||||||||||||
|
|
||||||||||||||
| // Shifts the original significand to normalize it, remove the | ||||||||||||||
| // implicit '1', and align it in the new 23-bit field | ||||||||||||||
| auto new_significand = | ||||||||||||||
| (static_cast<f32_traits::bits_type>(data_) | ||||||||||||||
| << (conv::significand_offset + leading_zeros + 1)) & | ||||||||||||||
| f32_traits::significand_mask; | ||||||||||||||
|
|
||||||||||||||
| return conv::shift_sign(data_) | new_exponent | new_significand; | ||||||||||||||
| } else { | ||||||||||||||
| return conv::shift_sign(data_) | conv::shift_exponent(data_) | | ||||||||||||||
| conv::shift_significand(data_); | ||||||||||||||
|
|
||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe add another test from rounds down to even number by adding one last bit into the end to ensure we compare full tail before shift.