diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 9940c352f8ab21..4c8b2db4ed3778 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2636,6 +2636,17 @@ void CodeGen::genCodeForBinary(GenTreeOp* tree) emit->emitIns_Add_Add_Tls_Reloc(attr, targetReg, op1->GetRegNum(), op2->AsIntCon()->IconValue()); return; } + else if (oper == GT_ADD && op1->isContained() && op1->OperIsHWIntrinsic(NI_Sve_GetActiveElementCount)) + { + const GenTreeHWIntrinsic* elementCountNode = op1->AsHWIntrinsic(); + GenTree* elementCountChildNode = elementCountNode->Op(1); + + regNumber reg1 = op2->GetRegNum(); + regNumber reg2 = elementCountChildNode->GetRegNum(); + + emit->emitInsSve_R_R(INS_sve_incp, EA_8BYTE, reg1, reg2, INS_OPTS_SCALABLE_B); + return; + } instruction ins = genGetInsForOper(tree->OperGet(), targetType); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 8754c2b24df809..1ae56e93e9310d 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20494,7 +20494,8 @@ bool GenTree::isContainableHWIntrinsic() const } } #elif defined(TARGET_ARM64) - return (AsHWIntrinsic()->GetHWIntrinsicId() == NI_Sve_ConditionalSelect); + return (AsHWIntrinsic()->GetHWIntrinsicId() == NI_Sve_ConditionalSelect || + AsHWIntrinsic()->GetHWIntrinsicId() == NI_Sve_GetActiveElementCount); #else return false; #endif // TARGET_XARCH diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 61de442555b8fc..7bb6bb044b4473 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7299,6 +7299,11 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) { return next; } + + if (TryContainingGetActiveElementCount(node)) + { + return next; + } } #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index fb8c06ba6f6930..87f8761ad5c7ba 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -109,6 +109,7 @@ class Lowering final : public Phase bool TryLowerAddSubToMulLongOp(GenTreeOp* op, GenTree** next); bool TryLowerNegToMulLongOp(GenTreeOp* op, GenTree** next); bool TryContainingCselOp(GenTreeHWIntrinsic* parentNode, GenTreeHWIntrinsic* childNode); + bool TryContainingGetActiveElementCount(GenTreeOp* node); #endif #ifdef TARGET_RISCV64 bool TryLowerShiftAddToShxadd(GenTreeOp* tree, GenTree** next); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 47fbb10db498cb..91cd59941e8a7d 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3738,6 +3738,31 @@ bool Lowering::TryContainingCselOp(GenTreeHWIntrinsic* parentNode, GenTreeHWIntr return canContain; } +bool Lowering::TryContainingGetActiveElementCount(GenTreeOp* node) +{ + assert(node->OperIs(GT_ADD)); + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + if (!op1->OperIsHWIntrinsic(NI_Sve_GetActiveElementCount)) + { + return false; + } + + assert(op1->canBeContained()); + + const GenTreeHWIntrinsic* elementCountNode = op1->AsHWIntrinsic(); + GenTree* mask1 = elementCountNode->Op(1)->AsHWIntrinsic()->Op(2); + GenTree* mask2 = elementCountNode->Op(2)->AsHWIntrinsic()->Op(2); + if (mask1->GetVN(VNK_Liberal) != mask2->GetVN(VNK_Liberal)) + { + return false; + } + + MakeSrcContained(node, op1); + return true; +} + #endif // TARGET_ARM64 //------------------------------------------------------------------------ diff --git a/src/tests/JIT/opt/SVE/active_element_count.cs b/src/tests/JIT/opt/SVE/active_element_count.cs new file mode 100644 index 00000000000000..2d10ae15b4a49b --- /dev/null +++ b/src/tests/JIT/opt/SVE/active_element_count.cs @@ -0,0 +1,85 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +public class TestGetActiveElementCount +{ + [Fact] + public static int TestEntryPoint() + { + if (Sve.IsSupported) + { + bool fail = false; + + Vector v0 = Vector.Create(1); + Vector v1 = Vector.Create(4); + if (DifferentVector(v0, v1, 0) != 16) + { + fail = true; + } + + if (SameVector(v0, 0) != 16) + { + fail = true; + } + + Vector vDouble0 = Vector.Create(1); + Vector vDouble1 = Vector.Create(4); + if (DifferentVectorDouble(vDouble0, vDouble1, 0) != 2) + { + fail = true; + } + + if (SameVectorDouble(vDouble0, 0) != 2) + { + fail = true; + } + + if (fail) + { + return 101; + } + } + return 100; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static ulong DifferentVector(Vector v0, Vector v1, ulong total) + { + //ARM64-FULL-LINE: cntp {{x[0-9]+}}, {{p[0-9]+}}, {{p[0-9]+}}.b + //ARM64-FULL-LINE: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + total += Sve.GetActiveElementCount(v0, v1); + return total; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static ulong SameVector(Vector v0, ulong total) + { + //ARM64-FULL-LINE: incp {{x[0-9]+}}, {{p[0-9]+}}.b + total += Sve.GetActiveElementCount(v0, v0); + return total; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static ulong DifferentVectorDouble(Vector v0, Vector v1, ulong total) + { + //ARM64-FULL-LINE: cntp {{x[0-9]+}}, {{p[0-9]+}}, {{p[0-9]+}}.b + //ARM64-FULL-LINE: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + total += Sve.GetActiveElementCount(v0, v1); + return total; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static ulong SameVectorDouble(Vector v0, ulong total) + { + //ARM64-FULL-LINE: incp {{x[0-9]+}}, {{p[0-9]+}}.b + total += Sve.GetActiveElementCount(v0, v0); + return total; + } +} diff --git a/src/tests/JIT/opt/SVE/active_element_count.csproj b/src/tests/JIT/opt/SVE/active_element_count.csproj new file mode 100644 index 00000000000000..1b8a006b01e7c7 --- /dev/null +++ b/src/tests/JIT/opt/SVE/active_element_count.csproj @@ -0,0 +1,10 @@ + + + true + True + $(NoWarn),SYSLIB5003 + + + + +