@@ -569,11 +569,11 @@ TEST_SVE(sve_last_r) {
569569 ASSERT_EQUAL_64(0x1111111111111111, x20);
570570 ASSERT_EQUAL_64(0x1111111111111110, x21);
571571 break;
572- case 384 :
573- ASSERT_EQUAL_64(0x000000000000003f , x2);
572+ case 512 :
573+ ASSERT_EQUAL_64(0x000000000000004f , x2);
574574 ASSERT_EQUAL_64(0x0000000000001118, x10);
575- ASSERT_EQUAL_64(0x000000001111111b , x13);
576- ASSERT_EQUAL_64(0x000000001111111b , x18);
575+ ASSERT_EQUAL_64(0x000000001111111f , x13);
576+ ASSERT_EQUAL_64(0x000000001111111f , x18);
577577 ASSERT_EQUAL_64(0x1111111111111112, x20);
578578 ASSERT_EQUAL_64(0x1111111111111113, x21);
579579 break;
@@ -653,11 +653,11 @@ TEST_SVE(sve_last_v) {
653653 ASSERT_EQUAL_128(0, 0x1111111111111111, q20);
654654 ASSERT_EQUAL_128(0, 0x1111111111111110, q21);
655655 break;
656- case 384 :
657- ASSERT_EQUAL_128(0, 0x000000000000003f , q2);
656+ case 512 :
657+ ASSERT_EQUAL_128(0, 0x000000000000004f , q2);
658658 ASSERT_EQUAL_128(0, 0x0000000000001118, q10);
659- ASSERT_EQUAL_128(0, 0x000000001111111b , q13);
660- ASSERT_EQUAL_128(0, 0x000000001111111b , q18);
659+ ASSERT_EQUAL_128(0, 0x000000001111111f , q13);
660+ ASSERT_EQUAL_128(0, 0x000000001111111f , q18);
661661 ASSERT_EQUAL_128(0, 0x1111111111111112, q20);
662662 ASSERT_EQUAL_128(0, 0x1111111111111113, q21);
663663 break;
@@ -741,7 +741,7 @@ TEST_SVE(sve_clast_r) {
741741 case 128:
742742 ASSERT_EQUAL_64(0x0000000000001116, x10);
743743 break;
744- case 384 :
744+ case 512 :
745745 ASSERT_EQUAL_64(0x0000000000001118, x10);
746746 break;
747747 case 2048:
@@ -819,7 +819,7 @@ TEST_SVE(sve_clast_v) {
819819 case 128:
820820 ASSERT_EQUAL_128(0, 0x0000000000001116, q10);
821821 break;
822- case 384 :
822+ case 512 :
823823 ASSERT_EQUAL_128(0, 0x0000000000001118, q10);
824824 break;
825825 case 2048:
@@ -916,7 +916,7 @@ TEST_SVE(sve_clast_z) {
916916 case 128:
917917 ASSERT_EQUAL_SVE(z10_expected_vl128, z10.VnD());
918918 break;
919- case 384 :
919+ case 512 :
920920 case 2048:
921921 ASSERT_EQUAL_SVE(z10_expected_vl_long, z10.VnD());
922922 break;
@@ -20709,74 +20709,56 @@ void Test_sve_fmatmul(Test* config) {
2070920709 if (CAN_RUN()) {
2071020710 RUN();
2071120711
20712- int vl = core.GetSVELaneCount(kBRegSize) * 8 ;
20713- if (vl >= 256 ) {
20712+ int vl = core.GetSVELaneCount(kDRegSize) ;
20713+ if (vl >= 4 ) { // VL256 or longer.
2071420714 ASSERT_EQUAL_SVE(z1, z2);
2071520715 ASSERT_EQUAL_SVE(z4, z5);
2071620716
20717- switch (vl) {
20718- case 256:
20719- case 384: {
20720- // All results are 4.0 (1 * 1 + 2). Results for elements beyond a VL
20721- // that's a multiple of 256 bits should be zero.
20722- uint64_t z1_expected[] = {0x0000000000000000,
20723- 0x0000000000000000,
20724- 0x4010000000000000,
20725- 0x4010000000000000,
20726- 0x4010000000000000,
20727- 0x4010000000000000};
20728- ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
20729-
20730- uint64_t z4_expected[] = {0x0000000000000000,
20731- 0x0000000000000000,
20732- 0x4018000000000000, // 6.0
20733- 0x4022000000000000, // 9.0
20734- 0x4018000000000000, // 6.0
20735- 0x4054400000000000}; // 81.0
20736- ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
20737- break;
20738- }
20739- case 2048: {
20740- uint64_t z1_expected[] =
20741- {0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20742- 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20743- 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20744- 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20745- 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20746- 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20747- 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20748- 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20749- 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20750- 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20751- 0x4010000000000000, 0x4010000000000000};
20752- ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
20753-
20754- uint64_t z4_expected[] = {
20755- 0x40cb690000000000, 0x40c9728000000000, 0x40c9710000000000,
20756- 0x40c79e8000000000, 0x40c41f0000000000, 0x40c2708000000000,
20757- 0x40c26f0000000000, 0x40c0e48000000000, 0x40bbea0000000000,
20758- 0x40b91d0000000000, 0x40b91a0000000000, 0x40b6950000000000,
20759- 0x40b1d60000000000, 0x40af320000000000, 0x40af2c0000000000,
20760- 0x40ab420000000000, 0x40a4040000000000, 0x40a0aa0000000000,
20761- 0x40a0a40000000000, 0x409bb40000000000, 0x4091b80000000000,
20762- 0x408a880000000000, 0x408a700000000000, 0x4083c80000000000,
20763- 0x4071a00000000000, 0x4061a00000000000, 0x4061400000000000,
20764- 0x4051400000000000, 0x4018000000000000, 0x4022000000000000,
20765- 0x4018000000000000, 0x4054400000000000,
20766- };
20767- ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
20768- break;
20769- }
20770- default:
20771- printf("WARNING: Some tests skipped due to unexpected VL.\n");
20772- break;
20773- }
20717+ // All results are 4.0:
20718+ // z0 z0 z1
20719+ // (1 1)(1 1) + (2 2) = (4 4)
20720+ // (1 1)(1 1) (2 2) (4 4)
20721+ uint64_t z1_expected[] =
20722+ {0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20723+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20724+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20725+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20726+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20727+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20728+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20729+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20730+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20731+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
20732+ 0x4010000000000000, 0x4010000000000000};
20733+ ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
20734+
20735+ // First (highest z4_expected index) multiplications are:
20736+ // z4 z4 z4
20737+ // (-8 -5)(-8 -2) + (-8 -5) = (81 6)
20738+ // (-2 1)(-5 1) (-2 1) ( 9 6)
20739+ //
20740+ // ( 4 7)( 4 10) + ( 4 7) = ( 69 138)
20741+ // (10 13)( 7 13) (10 13) (141 282)
20742+ uint64_t z4_expected[] = {
20743+ 0x40cb690000000000, 0x40c9728000000000, 0x40c9710000000000,
20744+ 0x40c79e8000000000, 0x40c41f0000000000, 0x40c2708000000000,
20745+ 0x40c26f0000000000, 0x40c0e48000000000, 0x40bbea0000000000,
20746+ 0x40b91d0000000000, 0x40b91a0000000000, 0x40b6950000000000,
20747+ 0x40b1d60000000000, 0x40af320000000000, 0x40af2c0000000000,
20748+ 0x40ab420000000000, 0x40a4040000000000, 0x40a0aa0000000000,
20749+ 0x40a0a40000000000, 0x409bb40000000000, 0x4091b80000000000,
20750+ 0x408a880000000000, 0x408a700000000000, 0x4083c80000000000,
20751+ 0x4071a00000000000, 0x4061a00000000000, 0x4061400000000000,
20752+ 0x4051400000000000, 0x4018000000000000, 0x4022000000000000,
20753+ 0x4018000000000000, 0x4054400000000000,
20754+ };
20755+ ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
2077420756 }
2077520757 }
2077620758}
2077720759Test* test_sve_fmatmul_list[] =
2077820760 {Test::MakeSVETest(256, "AARCH64_ASM_sve_fmatmul_vl256", &Test_sve_fmatmul),
20779- Test::MakeSVETest(384 , "AARCH64_ASM_sve_fmatmul_vl384 ", &Test_sve_fmatmul),
20761+ Test::MakeSVETest(512 , "AARCH64_ASM_sve_fmatmul_vl512 ", &Test_sve_fmatmul),
2078020762 Test::MakeSVETest(2048,
2078120763 "AARCH64_ASM_sve_fmatmul_vl2048",
2078220764 &Test_sve_fmatmul)};
@@ -20856,57 +20838,15 @@ void Test_sve_ld1ro(Test* config) {
2085620838 ASSERT_EQUAL_SVE(z4, z5);
2085720839 ASSERT_EQUAL_SVE(z6, z7);
2085820840
20859- switch (vl) {
20860- case 256:
20861- case 2048: {
20862- // Check the result of the rotate/eor sequence.
20863- uint64_t expected_z9[] = {0, 0};
20864- ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
20865- break;
20866- }
20867- case 384: {
20868- // For non-multiple-of-256 VL, the top 128-bits must be zero, which
20869- // breaks the rotate/eor sequence. Check the results explicitly.
20870- uint64_t z0_expected[] = {0x0000000000000000,
20871- 0x0000000000000000,
20872- 0x0000000000000000,
20873- 0x0000000000000000,
20874- 0x0000000000000000,
20875- 0x000d000b00090007};
20876- uint64_t z2_expected[] = {0x0000000000000000,
20877- 0x0000000000000000,
20878- 0x868584838281807f,
20879- 0x7e7d7c7b7a797877,
20880- 0x767574737271706f,
20881- 0x6e6d6c6b6a696867};
20882- uint64_t z4_expected[] = {0x0000000000000000,
20883- 0x0000000000000000,
20884- 0xe6e5e4e3e2e1e0df,
20885- 0xdedddcdbdad9d8d7,
20886- 0xd6d5d4d3d2d1d0cf,
20887- 0xcecdcccbcac9c8c7};
20888- uint64_t z6_expected[] = {0x0000000000000000,
20889- 0x0000000000000000,
20890- 0xc6c5c4c3c2c1c0bf,
20891- 0xbebdbcbbbab9b8b7,
20892- 0xb6b5b4b3b2b1b0af,
20893- 0xaeadacabaaa9a8a7};
20894- ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
20895- ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
20896- ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
20897- ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
20898- break;
20899- }
20900- default:
20901- printf("WARNING: Some tests skipped due to unexpected VL.\n");
20902- break;
20903- }
20841+ // Check the result of the rotate/eor sequence.
20842+ uint64_t expected_z9[] = {0, 0};
20843+ ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
2090420844 }
2090520845 }
2090620846}
2090720847Test* test_sve_ld1ro_list[] =
2090820848 {Test::MakeSVETest(256, "AARCH64_ASM_sve_ld1ro_vl256", &Test_sve_ld1ro),
20909- Test::MakeSVETest(384 , "AARCH64_ASM_sve_ld1ro_vl384 ", &Test_sve_ld1ro),
20849+ Test::MakeSVETest(512 , "AARCH64_ASM_sve_ld1ro_vl512 ", &Test_sve_ld1ro),
2091020850 Test::MakeSVETest(2048, "AARCH64_ASM_sve_ld1ro_vl2048", &Test_sve_ld1ro)};
2091120851#endif
2091220852
0 commit comments