-
Notifications
You must be signed in to change notification settings - Fork 5k
Add BFloat16 #98643
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Add BFloat16 #98643
Changes from 10 commits
589afe0
312d051
5e1c981
1fb4765
fc05d3b
152fe99
25a16e7
50d90aa
559f2e0
b24839c
8284526
8e32e71
4bd266e
6df00e6
ff295fd
09af2b2
1a8f0ad
c9fc867
e9fc0f8
c967aa5
17c13c0
ad780a0
c01949f
b63c1df
754a3c8
bcc260f
5a3d200
c420dd3
13e65d1
8c5f546
0cb3932
b615e68
8f70d91
2458dd8
a8bb94b
9644914
a29db5c
a8a8a49
2fd392f
f1582e7
eace3a6
abd1e80
62156c9
e8012c9
f711f8d
08168ff
d59a8c5
eb6dc47
25b7684
832651e
a07fe96
f9c35d3
4059b66
4b4d1a5
6ed52f5
14b0d85
ea1dd5f
dfd49c8
f5461ac
daaec69
9938e8b
b889417
1282c85
e6dd118
922f411
86dce2d
9f729a3
6baf940
fb88f8e
d697344
4e83ad9
d58c80a
8639fa3
34b1d07
8adbeb2
1f2653f
9046622
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
namespace System.Numerics | ||
{ | ||
/// <summary> | ||
/// Represents a shortened (16-bit) version of 32 bit floating-point value (<see cref="float"/>). | ||
/// </summary> | ||
public readonly struct BFloat16 | ||
tannergooding marked this conversation as resolved.
Show resolved
Hide resolved
|
||
: IComparable, | ||
IComparable<BFloat16>, | ||
IEquatable<BFloat16> | ||
{ | ||
private const ushort EpsilonBits = 0x0001; | ||
|
||
private const ushort MinValueBits = 0xFF7F; | ||
private const ushort MaxValueBits = 0x7F7F; | ||
|
||
/// <summary> | ||
/// Represents the smallest positive <see cref="BFloat16"/> value that is greater than zero. | ||
/// </summary> | ||
public static BFloat16 Epsilon => new BFloat16(EpsilonBits); | ||
|
||
/// <summary> | ||
/// Represents the smallest possible value of <see cref="BFloat16"/>. | ||
/// </summary> | ||
public static BFloat16 MinValue => new BFloat16(MinValueBits); | ||
|
||
/// <summary> | ||
/// Represents the largest possible value of <see cref="BFloat16"/>. | ||
/// </summary> | ||
public static BFloat16 MaxValue => new BFloat16(MaxValueBits); | ||
|
||
internal readonly ushort _value; | ||
|
||
internal BFloat16(ushort value) => _value = value; | ||
|
||
// Casting | ||
|
||
/// <summary>Explicitly converts a <see cref="float" /> value to its nearest representable <see cref="BFloat16"/> value.</summary> | ||
/// <param name="value">The value to convert.</param> | ||
/// <returns><paramref name="value" /> converted to its nearest representable <see cref="BFloat16"/> value.</returns> | ||
public static explicit operator BFloat16(float value) | ||
{ | ||
uint bits = BitConverter.SingleToUInt32Bits(value); | ||
uint upper = bits >> 16; | ||
// Only do rounding for finite numbers | ||
if (float.IsFinite(value)) | ||
{ | ||
uint lower = bits & 0xFFFF; | ||
uint sign = upper & 0x8000; | ||
// Strip sign from upper | ||
upper &= 0x7FFF; | ||
// Determine the increment for rounding | ||
// When upper is even, midpoint (0x8000) will tie to no increment, which is effectively a decrement of lower | ||
uint lowerShift = (~upper) & (lower >> 15) & 1; // Upper is even & lower>=0x8000 (not 0) | ||
lower -= lowerShift; | ||
uint increment = lower >> 15; | ||
// Do the increment, MaxValue will be correctly increased to Infinity | ||
upper += increment; | ||
// Put back sign with upper bits and done | ||
upper |= sign; | ||
} | ||
return new BFloat16((ushort)upper); | ||
} | ||
|
||
/// <summary>Explicitly converts a <see cref="double" /> value to its nearest representable <see cref="BFloat16"/> value.</summary> | ||
/// <param name="value">The value to convert.</param> | ||
/// <returns><paramref name="value" /> converted to its nearest representable <see cref="BFloat16"/> value.</returns> | ||
public static explicit operator BFloat16(double value) => (BFloat16)(float)value; | ||
|
||
/// <summary>Explicitly converts a <see cref="BFloat16" /> value to its nearest representable <see cref="float"/> value.</summary> | ||
/// <param name="value">The value to convert.</param> | ||
/// <returns><paramref name="value" /> converted to its nearest representable <see cref="float"/> value.</returns> | ||
|
||
public static explicit operator float(BFloat16 value) => BitConverter.Int32BitsToSingle(value._value << 16); | ||
|
||
/// <summary>Explicitly converts a <see cref="BFloat16" /> value to its nearest representable <see cref="double"/> value.</summary> | ||
/// <param name="value">The value to convert.</param> | ||
/// <returns><paramref name="value" /> converted to its nearest representable <see cref="double"/> value.</returns> | ||
public static explicit operator double(BFloat16 value) => (double)(float)value; | ||
|
||
// BFloat is effectively a truncation of Single, with lower 16 bits of mantissa truncated. | ||
// Delegating all operations to Single should be correct and effective. | ||
|
||
// Comparison | ||
|
||
/// <summary> | ||
/// Compares this object to another object, returning an integer that indicates the relationship. | ||
/// </summary> | ||
/// <returns>A value less than zero if this is less than <paramref name="obj"/>, zero if this is equal to <paramref name="obj"/>, or a value greater than zero if this is greater than <paramref name="obj"/>.</returns> | ||
/// <exception cref="ArgumentException">Thrown when <paramref name="obj"/> is not of type <see cref="BFloat16"/>.</exception> | ||
public int CompareTo(object? obj) | ||
{ | ||
if (obj is not BFloat16 other) | ||
{ | ||
return (obj is null) ? 1 : throw new ArgumentException(SR.Arg_MustBeBFloat16); | ||
} | ||
return CompareTo(other); | ||
} | ||
|
||
/// <summary> | ||
/// Compares this object to another object, returning an integer that indicates the relationship. | ||
/// </summary> | ||
/// <returns>A value less than zero if this is less than <paramref name="other"/>, zero if this is equal to <paramref name="other"/>, or a value greater than zero if this is greater than <paramref name="other"/>.</returns> | ||
public int CompareTo(BFloat16 other) => ((float)this).CompareTo((float)other); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is converting to Presumably its There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah it should be, as upcasting is simple shifting without branching. I would do a benchmark later. |
||
|
||
/// <inheritdoc cref="IEqualityOperators{TSelf, TOther, TResult}.op_Equality(TSelf, TOther)" /> | ||
public static bool operator ==(BFloat16 left, BFloat16 right) => (float)left == (float)right; | ||
|
||
/// <inheritdoc cref="IEqualityOperators{TSelf, TOther, TResult}.op_Inequality(TSelf, TOther)" /> | ||
public static bool operator !=(BFloat16 left, BFloat16 right) => (float)left != (float)right; | ||
|
||
/// <inheritdoc cref="IComparisonOperators{TSelf, TOther, TResult}.op_LessThan(TSelf, TOther)" /> | ||
public static bool operator <(BFloat16 left, BFloat16 right) => (float)left < (float)right; | ||
|
||
/// <inheritdoc cref="IComparisonOperators{TSelf, TOther, TResult}.op_GreaterThan(TSelf, TOther)" /> | ||
public static bool operator >(BFloat16 left, BFloat16 right) => (float)left > (float)right; | ||
|
||
/// <inheritdoc cref="IComparisonOperators{TSelf, TOther, TResult}.op_LessThanOrEqual(TSelf, TOther)" /> | ||
public static bool operator <=(BFloat16 left, BFloat16 right) => (float)left <= (float)right; | ||
|
||
/// <inheritdoc cref="IComparisonOperators{TSelf, TOther, TResult}.op_GreaterThanOrEqual(TSelf, TOther)" /> | ||
public static bool operator >=(BFloat16 left, BFloat16 right) => (float)left >= (float)right; | ||
tannergooding marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Equality | ||
|
||
/// <summary> | ||
/// Returns a value that indicates whether this instance is equal to a specified <paramref name="other"/> value. | ||
/// </summary> | ||
public bool Equals(BFloat16 other) => ((float)this).Equals((float)other); | ||
|
||
/// <summary> | ||
/// Returns a value that indicates whether this instance is equal to a specified <paramref name="obj"/>. | ||
/// </summary> | ||
public override bool Equals(object? obj) => obj is BFloat16 other && Equals(other); | ||
|
||
/// <summary> | ||
/// Serves as the default hash function. | ||
/// </summary> | ||
public override int GetHashCode() => ((float)this).GetHashCode(); | ||
tannergooding marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/// <summary> | ||
/// Returns a string representation of the current value. | ||
/// </summary> | ||
public override string ToString() => ((float)this).ToString(); | ||
tannergooding marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.