Skip to content

Commit c40e288

Browse files
committed
Remove support for SVE vector lengths that are not powers of two.
NPOT SVE vector lengths are no longer allowed in AArch64, so remove the code that handles them, add assertions, and update tests.
1 parent 45e3920 commit c40e288

16 files changed

Lines changed: 531 additions & 3047 deletions

src/aarch64/logic-aarch64.cc

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7853,16 +7853,26 @@ LogicVRegister Simulator::matmul(VectorFormat vform_dst,
78537853
//
78547854
// Are stored in the input vector registers as:
78557855
//
7856-
// 3 2 1 0
7857-
// src1 = [ d | c | b | a ]
7858-
// src2 = [ D | B | C | A ]
7856+
// 3 2 1 0
7857+
// src1 = [ d | c | b | a ]
7858+
// src2 = [ D | B | C | A ] nb. transposition
78597859
//
7860+
// Giving:
7861+
// 3 2 1 0
7862+
// result = [ w | z | y | x ]
7863+
//
7864+
// Where:
7865+
//
7866+
// x = (a * A) + (b * C) + a
7867+
// y = (a * B) + (b * D) + b
7868+
// z = (c * A) + (d * C) + c
7869+
// w = (c * B) + (d * D) + d
78607870
template <typename T>
78617871
LogicVRegister Simulator::fmatmul(VectorFormat vform,
78627872
LogicVRegister srcdst,
78637873
const LogicVRegister& src1,
78647874
const LogicVRegister& src2) {
7865-
T result[kZRegMaxSizeInBytes / sizeof(T)];
7875+
T result[kZRegMaxSizeInBytes / sizeof(T)] = {};
78667876
int T_per_segment = 4;
78677877
int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));
78687878
for (int seg = 0; seg < segment_count; seg++) {
@@ -7879,12 +7889,9 @@ LogicVRegister Simulator::fmatmul(VectorFormat vform,
78797889
}
78807890
}
78817891
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7882-
// Elements outside a multiple of 4T are set to zero. This happens only
7883-
// for double precision operations, when the VL is a multiple of 128 bits,
7884-
// but not a multiple of 256 bits.
7885-
T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
7886-
srcdst.SetFloat<T>(vform, i, value);
7892+
srcdst.SetFloat<T>(vform, i, result[i]);
78877893
}
7894+
78887895
return srcdst;
78897896
}
78907897

src/aarch64/simulator-aarch64.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,7 @@ void Simulator::ResetState() {
704704
void Simulator::SetVectorLengthInBits(unsigned vector_length) {
705705
VIXL_ASSERT((vector_length >= kZRegMinSize) &&
706706
(vector_length <= kZRegMaxSize));
707-
VIXL_ASSERT((vector_length % kZRegMinSize) == 0);
707+
VIXL_ASSERT(IsPowerOf2(vector_length));
708708
vector_length_ = vector_length;
709709

710710
for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
@@ -13682,8 +13682,10 @@ void Simulator::SimulateSVEFPMatrixMul(const Instruction* instr) {
1368213682
SimVRegister& zm = ReadVRegister(instr->GetRm());
1368313683

1368413684
switch (form_hash_) {
13685-
case "fmmla_z_zzz_s"_h:
1368613685
case "fmmla_z_zzz_d"_h:
13686+
if (GetVectorLengthInBits() < 256) VisitUnimplemented(instr);
13687+
VIXL_FALLTHROUGH();
13688+
case "fmmla_z_zzz_s"_h:
1368713689
fmatmul(vform, zdn, zn, zm);
1368813690
break;
1368913691
default:

test/aarch64/test-assembler-sve-aarch64.cc

Lines changed: 57 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -569,11 +569,11 @@ TEST_SVE(sve_last_r) {
569569
ASSERT_EQUAL_64(0x1111111111111111, x20);
570570
ASSERT_EQUAL_64(0x1111111111111110, x21);
571571
break;
572-
case 384:
573-
ASSERT_EQUAL_64(0x000000000000003f, x2);
572+
case 512:
573+
ASSERT_EQUAL_64(0x000000000000004f, x2);
574574
ASSERT_EQUAL_64(0x0000000000001118, x10);
575-
ASSERT_EQUAL_64(0x000000001111111b, x13);
576-
ASSERT_EQUAL_64(0x000000001111111b, x18);
575+
ASSERT_EQUAL_64(0x000000001111111f, x13);
576+
ASSERT_EQUAL_64(0x000000001111111f, x18);
577577
ASSERT_EQUAL_64(0x1111111111111112, x20);
578578
ASSERT_EQUAL_64(0x1111111111111113, x21);
579579
break;
@@ -653,11 +653,11 @@ TEST_SVE(sve_last_v) {
653653
ASSERT_EQUAL_128(0, 0x1111111111111111, q20);
654654
ASSERT_EQUAL_128(0, 0x1111111111111110, q21);
655655
break;
656-
case 384:
657-
ASSERT_EQUAL_128(0, 0x000000000000003f, q2);
656+
case 512:
657+
ASSERT_EQUAL_128(0, 0x000000000000004f, q2);
658658
ASSERT_EQUAL_128(0, 0x0000000000001118, q10);
659-
ASSERT_EQUAL_128(0, 0x000000001111111b, q13);
660-
ASSERT_EQUAL_128(0, 0x000000001111111b, q18);
659+
ASSERT_EQUAL_128(0, 0x000000001111111f, q13);
660+
ASSERT_EQUAL_128(0, 0x000000001111111f, q18);
661661
ASSERT_EQUAL_128(0, 0x1111111111111112, q20);
662662
ASSERT_EQUAL_128(0, 0x1111111111111113, q21);
663663
break;
@@ -741,7 +741,7 @@ TEST_SVE(sve_clast_r) {
741741
case 128:
742742
ASSERT_EQUAL_64(0x0000000000001116, x10);
743743
break;
744-
case 384:
744+
case 512:
745745
ASSERT_EQUAL_64(0x0000000000001118, x10);
746746
break;
747747
case 2048:
@@ -819,7 +819,7 @@ TEST_SVE(sve_clast_v) {
819819
case 128:
820820
ASSERT_EQUAL_128(0, 0x0000000000001116, q10);
821821
break;
822-
case 384:
822+
case 512:
823823
ASSERT_EQUAL_128(0, 0x0000000000001118, q10);
824824
break;
825825
case 2048:
@@ -916,7 +916,7 @@ TEST_SVE(sve_clast_z) {
916916
case 128:
917917
ASSERT_EQUAL_SVE(z10_expected_vl128, z10.VnD());
918918
break;
919-
case 384:
919+
case 512:
920920
case 2048:
921921
ASSERT_EQUAL_SVE(z10_expected_vl_long, z10.VnD());
922922
break;
@@ -20709,74 +20709,56 @@ void Test_sve_fmatmul(Test* config) {
2070920709
if (CAN_RUN()) {
2071020710
RUN();
2071120711

20712-
int vl = core.GetSVELaneCount(kBRegSize) * 8;
20713-
if (vl >= 256) {
20712+
int vl = core.GetSVELaneCount(kDRegSize);
20713+
if (vl >= 4) { // VL256 or longer.
2071420714
ASSERT_EQUAL_SVE(z1, z2);
2071520715
ASSERT_EQUAL_SVE(z4, z5);
2071620716

20717-
switch (vl) {
20718-
case 256:
20719-
case 384: {
20720-
// All results are 4.0 (1 * 1 + 2). Results for elements beyond a VL
20721-
// that's a multiple of 256 bits should be zero.
20722-
uint64_t z1_expected[] = {0x0000000000000000,
20723-
0x0000000000000000,
20724-
0x4010000000000000,
20725-
0x4010000000000000,
20726-
0x4010000000000000,
20727-
0x4010000000000000};
20728-
ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
20729-
20730-
uint64_t z4_expected[] = {0x0000000000000000,
20731-
0x0000000000000000,
20732-
0x4018000000000000, // 6.0
20733-
0x4022000000000000, // 9.0
20734-
0x4018000000000000, // 6.0
20735-
0x4054400000000000}; // 81.0
20736-
ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
20737-
break;
20738-
}
20739-
case 2048: {
20740-
uint64_t z1_expected[] =
20741-
{0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20742-
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20743-
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20744-
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20745-
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20746-
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20747-
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20748-
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20749-
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20750-
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20751-
0x4010000000000000, 0x4010000000000000};
20752-
ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
20753-
20754-
uint64_t z4_expected[] = {
20755-
0x40cb690000000000, 0x40c9728000000000, 0x40c9710000000000,
20756-
0x40c79e8000000000, 0x40c41f0000000000, 0x40c2708000000000,
20757-
0x40c26f0000000000, 0x40c0e48000000000, 0x40bbea0000000000,
20758-
0x40b91d0000000000, 0x40b91a0000000000, 0x40b6950000000000,
20759-
0x40b1d60000000000, 0x40af320000000000, 0x40af2c0000000000,
20760-
0x40ab420000000000, 0x40a4040000000000, 0x40a0aa0000000000,
20761-
0x40a0a40000000000, 0x409bb40000000000, 0x4091b80000000000,
20762-
0x408a880000000000, 0x408a700000000000, 0x4083c80000000000,
20763-
0x4071a00000000000, 0x4061a00000000000, 0x4061400000000000,
20764-
0x4051400000000000, 0x4018000000000000, 0x4022000000000000,
20765-
0x4018000000000000, 0x4054400000000000,
20766-
};
20767-
ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
20768-
break;
20769-
}
20770-
default:
20771-
printf("WARNING: Some tests skipped due to unexpected VL.\n");
20772-
break;
20773-
}
20717+
// All results are 4.0:
20718+
// z0 z0 z1
20719+
// (1 1)(1 1) + (2 2) = (4 4)
20720+
// (1 1)(1 1) (2 2) (4 4)
20721+
uint64_t z1_expected[] =
20722+
{0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20723+
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20724+
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20725+
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20726+
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20727+
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20728+
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20729+
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20730+
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20731+
0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20732+
0x4010000000000000, 0x4010000000000000};
20733+
ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
20734+
20735+
// First (highest z4_expected index) multiplications are:
20736+
// z4 z4 z4
20737+
// (-8 -5)(-8 -2) + (-8 -5) = (81 6)
20738+
// (-2 1)(-5 1) (-2 1) ( 9 6)
20739+
//
20740+
// ( 4 7)( 4 10) + ( 4 7) = ( 69 138)
20741+
// (10 13)( 7 13) (10 13) (141 282)
20742+
uint64_t z4_expected[] = {
20743+
0x40cb690000000000, 0x40c9728000000000, 0x40c9710000000000,
20744+
0x40c79e8000000000, 0x40c41f0000000000, 0x40c2708000000000,
20745+
0x40c26f0000000000, 0x40c0e48000000000, 0x40bbea0000000000,
20746+
0x40b91d0000000000, 0x40b91a0000000000, 0x40b6950000000000,
20747+
0x40b1d60000000000, 0x40af320000000000, 0x40af2c0000000000,
20748+
0x40ab420000000000, 0x40a4040000000000, 0x40a0aa0000000000,
20749+
0x40a0a40000000000, 0x409bb40000000000, 0x4091b80000000000,
20750+
0x408a880000000000, 0x408a700000000000, 0x4083c80000000000,
20751+
0x4071a00000000000, 0x4061a00000000000, 0x4061400000000000,
20752+
0x4051400000000000, 0x4018000000000000, 0x4022000000000000,
20753+
0x4018000000000000, 0x4054400000000000,
20754+
};
20755+
ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
2077420756
}
2077520757
}
2077620758
}
2077720759
Test* test_sve_fmatmul_list[] =
2077820760
{Test::MakeSVETest(256, "AARCH64_ASM_sve_fmatmul_vl256", &Test_sve_fmatmul),
20779-
Test::MakeSVETest(384, "AARCH64_ASM_sve_fmatmul_vl384", &Test_sve_fmatmul),
20761+
Test::MakeSVETest(512, "AARCH64_ASM_sve_fmatmul_vl512", &Test_sve_fmatmul),
2078020762
Test::MakeSVETest(2048,
2078120763
"AARCH64_ASM_sve_fmatmul_vl2048",
2078220764
&Test_sve_fmatmul)};
@@ -20856,57 +20838,15 @@ void Test_sve_ld1ro(Test* config) {
2085620838
ASSERT_EQUAL_SVE(z4, z5);
2085720839
ASSERT_EQUAL_SVE(z6, z7);
2085820840

20859-
switch (vl) {
20860-
case 256:
20861-
case 2048: {
20862-
// Check the result of the rotate/eor sequence.
20863-
uint64_t expected_z9[] = {0, 0};
20864-
ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
20865-
break;
20866-
}
20867-
case 384: {
20868-
// For non-multiple-of-256 VL, the top 128-bits must be zero, which
20869-
// breaks the rotate/eor sequence. Check the results explicitly.
20870-
uint64_t z0_expected[] = {0x0000000000000000,
20871-
0x0000000000000000,
20872-
0x0000000000000000,
20873-
0x0000000000000000,
20874-
0x0000000000000000,
20875-
0x000d000b00090007};
20876-
uint64_t z2_expected[] = {0x0000000000000000,
20877-
0x0000000000000000,
20878-
0x868584838281807f,
20879-
0x7e7d7c7b7a797877,
20880-
0x767574737271706f,
20881-
0x6e6d6c6b6a696867};
20882-
uint64_t z4_expected[] = {0x0000000000000000,
20883-
0x0000000000000000,
20884-
0xe6e5e4e3e2e1e0df,
20885-
0xdedddcdbdad9d8d7,
20886-
0xd6d5d4d3d2d1d0cf,
20887-
0xcecdcccbcac9c8c7};
20888-
uint64_t z6_expected[] = {0x0000000000000000,
20889-
0x0000000000000000,
20890-
0xc6c5c4c3c2c1c0bf,
20891-
0xbebdbcbbbab9b8b7,
20892-
0xb6b5b4b3b2b1b0af,
20893-
0xaeadacabaaa9a8a7};
20894-
ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
20895-
ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
20896-
ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
20897-
ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
20898-
break;
20899-
}
20900-
default:
20901-
printf("WARNING: Some tests skipped due to unexpected VL.\n");
20902-
break;
20903-
}
20841+
// Check the result of the rotate/eor sequence.
20842+
uint64_t expected_z9[] = {0, 0};
20843+
ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
2090420844
}
2090520845
}
2090620846
}
2090720847
Test* test_sve_ld1ro_list[] =
2090820848
{Test::MakeSVETest(256, "AARCH64_ASM_sve_ld1ro_vl256", &Test_sve_ld1ro),
20909-
Test::MakeSVETest(384, "AARCH64_ASM_sve_ld1ro_vl384", &Test_sve_ld1ro),
20849+
Test::MakeSVETest(512, "AARCH64_ASM_sve_ld1ro_vl512", &Test_sve_ld1ro),
2091020850
Test::MakeSVETest(2048, "AARCH64_ASM_sve_ld1ro_vl2048", &Test_sve_ld1ro)};
2091120851
#endif
2091220852

test/aarch64/test-trace-aarch64.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3027,7 +3027,7 @@ static void TraceTestHelper(bool coloured_trace,
30273027
simulator.SetTraceParameters(trace_parameters);
30283028
simulator.SilenceExclusiveAccessWarning();
30293029

3030-
const int vl_in_bytes = 5 * kZRegMinSizeInBytes;
3030+
const int vl_in_bytes = 4 * kZRegMinSizeInBytes;
30313031
const int vl_in_bits = vl_in_bytes * kBitsPerByte;
30323032
const int pl_in_bits = vl_in_bits / kZRegBitsPerPRegBit;
30333033
simulator.SetVectorLengthInBits(vl_in_bits);

test/aarch64/test-utils-aarch64.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -618,9 +618,9 @@ void ComputeMachineStateHash(MacroAssembler* masm, uint32_t* dst);
618618
"AARCH64_" type "_" #name \
619619
"_vl128", \
620620
&Test##name), \
621-
Test::MakeSVETest(384, \
621+
Test::MakeSVETest(512, \
622622
"AARCH64_" type "_" #name \
623-
"_vl384", \
623+
"_vl512", \
624624
&Test##name), \
625625
Test::MakeSVETest(2048, \
626626
"AARCH64_" type "_" #name \

0 commit comments

Comments
 (0)