-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[WebAssembly] Support promoting lower lanes of f16x8 to f32x4. #129786
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Brendan Dahl (brendandahl) ChangesFull diff: https://github.com/llvm/llvm-project/pull/129786.diff 6 Files Affected:
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h
index 08e39bf1a79b4..c509d7841135e 100644
--- a/clang/lib/Headers/wasm_simd128.h
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -45,6 +45,7 @@ typedef int __i32x2 __attribute__((__vector_size__(8), __aligned__(8)));
typedef unsigned int __u32x2
__attribute__((__vector_size__(8), __aligned__(8)));
typedef float __f32x2 __attribute__((__vector_size__(8), __aligned__(8)));
+typedef __fp16 __f16x4 __attribute__((__vector_size__(8), __aligned__(8)));
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("simd128"), \
@@ -2010,6 +2011,13 @@ static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_u16x8(v128_t __a) {
return (v128_t) __builtin_convertvector((__u16x8)__a, __f16x8);
}
+static __inline__ v128_t __FP16_FN_ATTRS
+wasm_f32x4_promote_low_f16x8(v128_t __a) {
+ return (v128_t) __builtin_convertvector(
+ (__f16x4){((__f16x8)__a)[0], ((__f16x8)__a)[1],
+ ((__f16x8)__a)[2], ((__f16x8)__a)[3]}, __f32x4);
+}
+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_madd(v128_t __a,
v128_t __b,
v128_t __c) {
diff --git a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
index b601d90cfcc92..1f4809483589e 100644
--- a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
+++ b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
@@ -1033,6 +1033,12 @@ v128_t test_f64x2_promote_low_f32x4(v128_t a) {
return wasm_f64x2_promote_low_f32x4(a);
}
+// CHECK-LABEL: test_f32x4_promote_low_f16x8:
+// CHECK: f32x4.promote_low_f16x8{{$}}
+v128_t test_f32x4_promote_low_f16x8(v128_t a) {
+ return wasm_f32x4_promote_low_f16x8(a);
+}
+
// CHECK-LABEL: test_i8x16_shuffle:
// CHECK: i8x16.shuffle 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
// 0{{$}}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index b24a45c2d8898..4a034ed508cfe 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2341,7 +2341,7 @@ WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
- if (Op.getValueType() != MVT::v2f64)
+ if (Op.getValueType() != MVT::v2f64 && Op.getValueType() != MVT::v4f32)
return SDValue();
auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
@@ -2354,6 +2354,7 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
Opcode = WebAssemblyISD::CONVERT_LOW_U;
break;
case ISD::FP_EXTEND:
+ case ISD::FP16_TO_FP:
Opcode = WebAssemblyISD::PROMOTE_LOW;
break;
default:
@@ -2372,36 +2373,52 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
return true;
};
- unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
- SDValue LHSSrcVec, RHSSrcVec;
- if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
- !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
+ unsigned NumLanes = Op.getValueType() == MVT::v2f64 ? 2 : 4;
+ unsigned FirstOpcode = 0, SecondOpcode = 0, ThirdOpcode = 0, FourthOpcode = 0;
+ unsigned FirstIndex = 0, SecondIndex = 0, ThirdIndex = 0, FourthIndex = 0;
+ SDValue FirstSrcVec, SecondSrcVec, ThirdSrcVec, FourthSrcVec;
+
+ if (!GetConvertedLane(Op.getOperand(0), FirstOpcode, FirstSrcVec, FirstIndex) ||
+ !GetConvertedLane(Op.getOperand(1), SecondOpcode, SecondSrcVec, SecondIndex))
+ return SDValue();
+
+ // If we're converting to v4f32, check the third and fourth lanes, too.
+ if (NumLanes == 4 && (!GetConvertedLane(Op.getOperand(2), ThirdOpcode, ThirdSrcVec, ThirdIndex) ||
+ !GetConvertedLane(Op.getOperand(3), FourthOpcode, FourthSrcVec, FourthIndex)))
+ return SDValue();
+
+ if (FirstOpcode != SecondOpcode)
return SDValue();
- if (LHSOpcode != RHSOpcode)
+ // TODO Add an optimization similar to the v2f64 below for shuffling the
+ // vectors when the lanes are in the wrong order or come from different src
+ // vectors.
+ if (NumLanes == 4 && (FirstOpcode != ThirdOpcode || FirstOpcode != FourthOpcode ||
+ FirstSrcVec != SecondSrcVec || FirstSrcVec != ThirdSrcVec || FirstSrcVec != FourthSrcVec ||
+ FirstIndex != 0 || SecondIndex != 1 || ThirdIndex != 2 || FourthIndex != 3))
return SDValue();
MVT ExpectedSrcVT;
- switch (LHSOpcode) {
+ switch (FirstOpcode) {
case WebAssemblyISD::CONVERT_LOW_S:
case WebAssemblyISD::CONVERT_LOW_U:
ExpectedSrcVT = MVT::v4i32;
break;
case WebAssemblyISD::PROMOTE_LOW:
- ExpectedSrcVT = MVT::v4f32;
+ ExpectedSrcVT = NumLanes == 2 ? MVT::v4f32 : MVT::v8i16;
break;
}
- if (LHSSrcVec.getValueType() != ExpectedSrcVT)
+ if (FirstSrcVec.getValueType() != ExpectedSrcVT)
return SDValue();
- auto Src = LHSSrcVec;
- if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
+ auto Src = FirstSrcVec;
+ if (NumLanes == 2 && (FirstIndex != 0 || SecondIndex != 1 || FirstSrcVec != SecondSrcVec)) {
// Shuffle the source vector so that the converted lanes are the low lanes.
Src = DAG.getVectorShuffle(
- ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
- {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
+ ExpectedSrcVT, DL, FirstSrcVec, SecondSrcVec,
+ {static_cast<int>(FirstIndex), static_cast<int>(SecondIndex) + 4, -1, -1});
}
- return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
+ return DAG.getNode(FirstOpcode, DL, NumLanes == 2 ? MVT::v2f64 : MVT::v4f32, Src);
}
SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index c591e5ef181a4..d2d62b8b62c3e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1468,6 +1468,7 @@ defm "" : SIMDConvert<F32x4, F64x2, demote_zero,
def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
+defm "" : HalfPrecisionConvert<F32x4, I16x8, promote_low, "promote_low_f16x8", 0x14b>;
// Lower extending loads to load64_zero + promote_low
def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll
index 4e8ff5955c63b..f0e23ea289265 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll
@@ -369,3 +369,23 @@ define <8 x half> @shuffle_poison_v8f16(<8 x half> %x, <8 x half> %y) {
i32 poison, i32 poison, i32 poison, i32 poison>
ret <8 x half> %res
}
+
+define <4 x float> @promote_low_v4f32(<8 x half> %x) {
+; CHECK-LABEL: promote_low_v4f32:
+; CHECK: .functype promote_low_v4f32 (v128) -> (v128){{$}}
+; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0
+; CHECK-NEXT: return $pop[[R]]
+ %v = shufflevector <8 x half> %x, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %a = fpext <4 x half> %v to <4 x float>
+ ret <4 x float> %a
+}
+
+define <4 x float> @promote_low_v4f32_2(<8 x half> %x) {
+; CHECK-LABEL: promote_low_v4f32_2:
+; CHECK: .functype promote_low_v4f32_2 (v128) -> (v128)
+; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0
+; CHECK-NEXT: return $pop[[R]]
+ %v = fpext <8 x half> %x to <8 x float>
+ %a = shufflevector <8 x float> %v, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %a
+}
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 48aec4bc52a0c..57af1daad0226 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -935,4 +935,7 @@ main:
# CHECK: f16x8.convert_i16x8_u # encoding: [0xfd,0xc8,0x02]
f16x8.convert_i16x8_u
+ # CHECK: f32x4.promote_low_f16x8 # encoding: [0xfd,0xcb,0x02]
+ f32x4.promote_low_f16x8
+
end_function
|
@llvm/pr-subscribers-backend-webassembly Author: Brendan Dahl (brendandahl) ChangesFull diff: https://github.com/llvm/llvm-project/pull/129786.diff 6 Files Affected:
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h
index 08e39bf1a79b4..c509d7841135e 100644
--- a/clang/lib/Headers/wasm_simd128.h
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -45,6 +45,7 @@ typedef int __i32x2 __attribute__((__vector_size__(8), __aligned__(8)));
typedef unsigned int __u32x2
__attribute__((__vector_size__(8), __aligned__(8)));
typedef float __f32x2 __attribute__((__vector_size__(8), __aligned__(8)));
+typedef __fp16 __f16x4 __attribute__((__vector_size__(8), __aligned__(8)));
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("simd128"), \
@@ -2010,6 +2011,13 @@ static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_u16x8(v128_t __a) {
return (v128_t) __builtin_convertvector((__u16x8)__a, __f16x8);
}
+static __inline__ v128_t __FP16_FN_ATTRS
+wasm_f32x4_promote_low_f16x8(v128_t __a) {
+ return (v128_t) __builtin_convertvector(
+ (__f16x4){((__f16x8)__a)[0], ((__f16x8)__a)[1],
+ ((__f16x8)__a)[2], ((__f16x8)__a)[3]}, __f32x4);
+}
+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_madd(v128_t __a,
v128_t __b,
v128_t __c) {
diff --git a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
index b601d90cfcc92..1f4809483589e 100644
--- a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
+++ b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
@@ -1033,6 +1033,12 @@ v128_t test_f64x2_promote_low_f32x4(v128_t a) {
return wasm_f64x2_promote_low_f32x4(a);
}
+// CHECK-LABEL: test_f32x4_promote_low_f16x8:
+// CHECK: f32x4.promote_low_f16x8{{$}}
+v128_t test_f32x4_promote_low_f16x8(v128_t a) {
+ return wasm_f32x4_promote_low_f16x8(a);
+}
+
// CHECK-LABEL: test_i8x16_shuffle:
// CHECK: i8x16.shuffle 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
// 0{{$}}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index b24a45c2d8898..4a034ed508cfe 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2341,7 +2341,7 @@ WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
- if (Op.getValueType() != MVT::v2f64)
+ if (Op.getValueType() != MVT::v2f64 && Op.getValueType() != MVT::v4f32)
return SDValue();
auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
@@ -2354,6 +2354,7 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
Opcode = WebAssemblyISD::CONVERT_LOW_U;
break;
case ISD::FP_EXTEND:
+ case ISD::FP16_TO_FP:
Opcode = WebAssemblyISD::PROMOTE_LOW;
break;
default:
@@ -2372,36 +2373,52 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
return true;
};
- unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
- SDValue LHSSrcVec, RHSSrcVec;
- if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
- !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
+ unsigned NumLanes = Op.getValueType() == MVT::v2f64 ? 2 : 4;
+ unsigned FirstOpcode = 0, SecondOpcode = 0, ThirdOpcode = 0, FourthOpcode = 0;
+ unsigned FirstIndex = 0, SecondIndex = 0, ThirdIndex = 0, FourthIndex = 0;
+ SDValue FirstSrcVec, SecondSrcVec, ThirdSrcVec, FourthSrcVec;
+
+ if (!GetConvertedLane(Op.getOperand(0), FirstOpcode, FirstSrcVec, FirstIndex) ||
+ !GetConvertedLane(Op.getOperand(1), SecondOpcode, SecondSrcVec, SecondIndex))
+ return SDValue();
+
+ // If we're converting to v4f32, check the third and fourth lanes, too.
+ if (NumLanes == 4 && (!GetConvertedLane(Op.getOperand(2), ThirdOpcode, ThirdSrcVec, ThirdIndex) ||
+ !GetConvertedLane(Op.getOperand(3), FourthOpcode, FourthSrcVec, FourthIndex)))
+ return SDValue();
+
+ if (FirstOpcode != SecondOpcode)
return SDValue();
- if (LHSOpcode != RHSOpcode)
+ // TODO Add an optimization similar to the v2f64 below for shuffling the
+ // vectors when the lanes are in the wrong order or come from different src
+ // vectors.
+ if (NumLanes == 4 && (FirstOpcode != ThirdOpcode || FirstOpcode != FourthOpcode ||
+ FirstSrcVec != SecondSrcVec || FirstSrcVec != ThirdSrcVec || FirstSrcVec != FourthSrcVec ||
+ FirstIndex != 0 || SecondIndex != 1 || ThirdIndex != 2 || FourthIndex != 3))
return SDValue();
MVT ExpectedSrcVT;
- switch (LHSOpcode) {
+ switch (FirstOpcode) {
case WebAssemblyISD::CONVERT_LOW_S:
case WebAssemblyISD::CONVERT_LOW_U:
ExpectedSrcVT = MVT::v4i32;
break;
case WebAssemblyISD::PROMOTE_LOW:
- ExpectedSrcVT = MVT::v4f32;
+ ExpectedSrcVT = NumLanes == 2 ? MVT::v4f32 : MVT::v8i16;
break;
}
- if (LHSSrcVec.getValueType() != ExpectedSrcVT)
+ if (FirstSrcVec.getValueType() != ExpectedSrcVT)
return SDValue();
- auto Src = LHSSrcVec;
- if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
+ auto Src = FirstSrcVec;
+ if (NumLanes == 2 && (FirstIndex != 0 || SecondIndex != 1 || FirstSrcVec != SecondSrcVec)) {
// Shuffle the source vector so that the converted lanes are the low lanes.
Src = DAG.getVectorShuffle(
- ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
- {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
+ ExpectedSrcVT, DL, FirstSrcVec, SecondSrcVec,
+ {static_cast<int>(FirstIndex), static_cast<int>(SecondIndex) + 4, -1, -1});
}
- return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
+ return DAG.getNode(FirstOpcode, DL, NumLanes == 2 ? MVT::v2f64 : MVT::v4f32, Src);
}
SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index c591e5ef181a4..d2d62b8b62c3e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1468,6 +1468,7 @@ defm "" : SIMDConvert<F32x4, F64x2, demote_zero,
def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
+defm "" : HalfPrecisionConvert<F32x4, I16x8, promote_low, "promote_low_f16x8", 0x14b>;
// Lower extending loads to load64_zero + promote_low
def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll
index 4e8ff5955c63b..f0e23ea289265 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll
@@ -369,3 +369,23 @@ define <8 x half> @shuffle_poison_v8f16(<8 x half> %x, <8 x half> %y) {
i32 poison, i32 poison, i32 poison, i32 poison>
ret <8 x half> %res
}
+
+define <4 x float> @promote_low_v4f32(<8 x half> %x) {
+; CHECK-LABEL: promote_low_v4f32:
+; CHECK: .functype promote_low_v4f32 (v128) -> (v128){{$}}
+; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0
+; CHECK-NEXT: return $pop[[R]]
+ %v = shufflevector <8 x half> %x, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %a = fpext <4 x half> %v to <4 x float>
+ ret <4 x float> %a
+}
+
+define <4 x float> @promote_low_v4f32_2(<8 x half> %x) {
+; CHECK-LABEL: promote_low_v4f32_2:
+; CHECK: .functype promote_low_v4f32_2 (v128) -> (v128)
+; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0
+; CHECK-NEXT: return $pop[[R]]
+ %v = fpext <8 x half> %x to <8 x float>
+ %a = shufflevector <8 x float> %v, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %a
+}
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 48aec4bc52a0c..57af1daad0226 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -935,4 +935,7 @@ main:
# CHECK: f16x8.convert_i16x8_u # encoding: [0xfd,0xc8,0x02]
f16x8.convert_i16x8_u
+ # CHECK: f32x4.promote_low_f16x8 # encoding: [0xfd,0xcb,0x02]
+ f32x4.promote_low_f16x8
+
end_function
|
@llvm/pr-subscribers-mc Author: Brendan Dahl (brendandahl) ChangesFull diff: https://github.com/llvm/llvm-project/pull/129786.diff 6 Files Affected:
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h
index 08e39bf1a79b4..c509d7841135e 100644
--- a/clang/lib/Headers/wasm_simd128.h
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -45,6 +45,7 @@ typedef int __i32x2 __attribute__((__vector_size__(8), __aligned__(8)));
typedef unsigned int __u32x2
__attribute__((__vector_size__(8), __aligned__(8)));
typedef float __f32x2 __attribute__((__vector_size__(8), __aligned__(8)));
+typedef __fp16 __f16x4 __attribute__((__vector_size__(8), __aligned__(8)));
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("simd128"), \
@@ -2010,6 +2011,13 @@ static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_u16x8(v128_t __a) {
return (v128_t) __builtin_convertvector((__u16x8)__a, __f16x8);
}
+static __inline__ v128_t __FP16_FN_ATTRS
+wasm_f32x4_promote_low_f16x8(v128_t __a) {
+ return (v128_t) __builtin_convertvector(
+ (__f16x4){((__f16x8)__a)[0], ((__f16x8)__a)[1],
+ ((__f16x8)__a)[2], ((__f16x8)__a)[3]}, __f32x4);
+}
+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_madd(v128_t __a,
v128_t __b,
v128_t __c) {
diff --git a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
index b601d90cfcc92..1f4809483589e 100644
--- a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
+++ b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
@@ -1033,6 +1033,12 @@ v128_t test_f64x2_promote_low_f32x4(v128_t a) {
return wasm_f64x2_promote_low_f32x4(a);
}
+// CHECK-LABEL: test_f32x4_promote_low_f16x8:
+// CHECK: f32x4.promote_low_f16x8{{$}}
+v128_t test_f32x4_promote_low_f16x8(v128_t a) {
+ return wasm_f32x4_promote_low_f16x8(a);
+}
+
// CHECK-LABEL: test_i8x16_shuffle:
// CHECK: i8x16.shuffle 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
// 0{{$}}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index b24a45c2d8898..4a034ed508cfe 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2341,7 +2341,7 @@ WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
- if (Op.getValueType() != MVT::v2f64)
+ if (Op.getValueType() != MVT::v2f64 && Op.getValueType() != MVT::v4f32)
return SDValue();
auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
@@ -2354,6 +2354,7 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
Opcode = WebAssemblyISD::CONVERT_LOW_U;
break;
case ISD::FP_EXTEND:
+ case ISD::FP16_TO_FP:
Opcode = WebAssemblyISD::PROMOTE_LOW;
break;
default:
@@ -2372,36 +2373,52 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
return true;
};
- unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
- SDValue LHSSrcVec, RHSSrcVec;
- if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
- !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
+ unsigned NumLanes = Op.getValueType() == MVT::v2f64 ? 2 : 4;
+ unsigned FirstOpcode = 0, SecondOpcode = 0, ThirdOpcode = 0, FourthOpcode = 0;
+ unsigned FirstIndex = 0, SecondIndex = 0, ThirdIndex = 0, FourthIndex = 0;
+ SDValue FirstSrcVec, SecondSrcVec, ThirdSrcVec, FourthSrcVec;
+
+ if (!GetConvertedLane(Op.getOperand(0), FirstOpcode, FirstSrcVec, FirstIndex) ||
+ !GetConvertedLane(Op.getOperand(1), SecondOpcode, SecondSrcVec, SecondIndex))
+ return SDValue();
+
+ // If we're converting to v4f32, check the third and fourth lanes, too.
+ if (NumLanes == 4 && (!GetConvertedLane(Op.getOperand(2), ThirdOpcode, ThirdSrcVec, ThirdIndex) ||
+ !GetConvertedLane(Op.getOperand(3), FourthOpcode, FourthSrcVec, FourthIndex)))
+ return SDValue();
+
+ if (FirstOpcode != SecondOpcode)
return SDValue();
- if (LHSOpcode != RHSOpcode)
+ // TODO Add an optimization similar to the v2f64 below for shuffling the
+ // vectors when the lanes are in the wrong order or come from different src
+ // vectors.
+ if (NumLanes == 4 && (FirstOpcode != ThirdOpcode || FirstOpcode != FourthOpcode ||
+ FirstSrcVec != SecondSrcVec || FirstSrcVec != ThirdSrcVec || FirstSrcVec != FourthSrcVec ||
+ FirstIndex != 0 || SecondIndex != 1 || ThirdIndex != 2 || FourthIndex != 3))
return SDValue();
MVT ExpectedSrcVT;
- switch (LHSOpcode) {
+ switch (FirstOpcode) {
case WebAssemblyISD::CONVERT_LOW_S:
case WebAssemblyISD::CONVERT_LOW_U:
ExpectedSrcVT = MVT::v4i32;
break;
case WebAssemblyISD::PROMOTE_LOW:
- ExpectedSrcVT = MVT::v4f32;
+ ExpectedSrcVT = NumLanes == 2 ? MVT::v4f32 : MVT::v8i16;
break;
}
- if (LHSSrcVec.getValueType() != ExpectedSrcVT)
+ if (FirstSrcVec.getValueType() != ExpectedSrcVT)
return SDValue();
- auto Src = LHSSrcVec;
- if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
+ auto Src = FirstSrcVec;
+ if (NumLanes == 2 && (FirstIndex != 0 || SecondIndex != 1 || FirstSrcVec != SecondSrcVec)) {
// Shuffle the source vector so that the converted lanes are the low lanes.
Src = DAG.getVectorShuffle(
- ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
- {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
+ ExpectedSrcVT, DL, FirstSrcVec, SecondSrcVec,
+ {static_cast<int>(FirstIndex), static_cast<int>(SecondIndex) + 4, -1, -1});
}
- return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
+ return DAG.getNode(FirstOpcode, DL, NumLanes == 2 ? MVT::v2f64 : MVT::v4f32, Src);
}
SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index c591e5ef181a4..d2d62b8b62c3e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1468,6 +1468,7 @@ defm "" : SIMDConvert<F32x4, F64x2, demote_zero,
def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
+defm "" : HalfPrecisionConvert<F32x4, I16x8, promote_low, "promote_low_f16x8", 0x14b>;
// Lower extending loads to load64_zero + promote_low
def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll
index 4e8ff5955c63b..f0e23ea289265 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll
@@ -369,3 +369,23 @@ define <8 x half> @shuffle_poison_v8f16(<8 x half> %x, <8 x half> %y) {
i32 poison, i32 poison, i32 poison, i32 poison>
ret <8 x half> %res
}
+
+define <4 x float> @promote_low_v4f32(<8 x half> %x) {
+; CHECK-LABEL: promote_low_v4f32:
+; CHECK: .functype promote_low_v4f32 (v128) -> (v128){{$}}
+; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0
+; CHECK-NEXT: return $pop[[R]]
+ %v = shufflevector <8 x half> %x, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %a = fpext <4 x half> %v to <4 x float>
+ ret <4 x float> %a
+}
+
+define <4 x float> @promote_low_v4f32_2(<8 x half> %x) {
+; CHECK-LABEL: promote_low_v4f32_2:
+; CHECK: .functype promote_low_v4f32_2 (v128) -> (v128)
+; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0
+; CHECK-NEXT: return $pop[[R]]
+ %v = fpext <8 x half> %x to <8 x float>
+ %a = shufflevector <8 x float> %v, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %a
+}
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 48aec4bc52a0c..57af1daad0226 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -935,4 +935,7 @@ main:
# CHECK: f16x8.convert_i16x8_u # encoding: [0xfd,0xc8,0x02]
f16x8.convert_i16x8_u
+ # CHECK: f32x4.promote_low_f16x8 # encoding: [0xfd,0xcb,0x02]
+ f32x4.promote_low_f16x8
+
end_function
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
✅ With the latest revision this PR passed the undef deprecator. |
; CHECK: .functype promote_low_v4f32 (v128) -> (v128){{$}} | ||
; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0 | ||
; CHECK-NEXT: return $pop[[R]] | ||
%v = shufflevector <8 x half> %x, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
%v = shufflevector <8 x half> %x, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | |
%v = shufflevector <8 x half> %x, <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0 | ||
; CHECK-NEXT: return $pop[[R]] | ||
%v = fpext <8 x half> %x to <8 x float> | ||
%a = shufflevector <8 x float> %v, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
%a = shufflevector <8 x float> %v, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | |
%a = shufflevector <8 x float> %v, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
5ca39c1
to
50ad443
Compare
No description provided.