@@ -655,12 +655,150 @@ DEF_ISEL(LDAR_LR64_LDSTEXCL) = LoadAcquire<R64W, M64>;
655
655
656
656
namespace {
657
657
658
+ #define MAKE_LD1 (esize ) \
659
+ template <typename S> \
660
+ DEF_SEM (LD1_SINGLE_ ## esize, V128W dst1, S src) { \
661
+ auto elems1 = UReadV ## esize (src); \
662
+ UWriteV ## esize (dst1, elems1); \
663
+ return memory; \
664
+ }
665
+
666
+ MAKE_LD1 (8 )
667
+ MAKE_LD1 (16 )
668
+ MAKE_LD1 (32 )
669
+ MAKE_LD1 (64 )
670
+
671
+ #undef MAKE_LD1
672
+
673
+ } // namespace
674
+
675
+ DEF_ISEL (LD1_ASISDLSE_R1_1V_8B) = LD1_SINGLE_8<MV64>;
676
+ DEF_ISEL (LD1_ASISDLSE_R1_1V_16B) = LD1_SINGLE_8<MV128>;
677
+
678
+ DEF_ISEL (LD1_ASISDLSE_R1_1V_4H) = LD1_SINGLE_16<MV64>;
679
+ DEF_ISEL (LD1_ASISDLSE_R1_1V_8H) = LD1_SINGLE_16<MV128>;
680
+
681
+ DEF_ISEL (LD1_ASISDLSE_R1_1V_2S) = LD1_SINGLE_32<MV64>;
682
+ DEF_ISEL (LD1_ASISDLSE_R1_1V_4S) = LD1_SINGLE_32<MV128>;
683
+
684
+ DEF_ISEL (LD1_ASISDLSE_R1_1V_1D) = LD1_SINGLE_64<MV64>;
685
+ DEF_ISEL (LD1_ASISDLSE_R1_1V_2D) = LD1_SINGLE_64<MV128>;
686
+
687
+ namespace {
688
+
689
+ #define MAKE_LD1 (esize ) \
690
+ template <typename S> \
691
+ DEF_SEM (LD1_PAIR_ ## esize, V128W dst1, V128W dst2, S src) { \
692
+ auto elems1 = UReadV ## esize (src); \
693
+ auto elems2 = UReadV ## esize (GetElementPtr (src, 1U )); \
694
+ UWriteV ## esize (dst1, elems1); \
695
+ UWriteV ## esize (dst2, elems2); \
696
+ return memory; \
697
+ }
698
+
699
+ MAKE_LD1 (8 )
700
+ MAKE_LD1 (16 )
701
+ MAKE_LD1 (32 )
702
+ MAKE_LD1 (64 )
703
+
704
+ #undef MAKE_LD1
705
+
706
+ } // namespace
707
+
708
+ DEF_ISEL (LD1_ASISDLSE_R2_2V_8B) = LD1_PAIR_8<MV64>;
709
+ DEF_ISEL (LD1_ASISDLSE_R2_2V_16B) = LD1_PAIR_8<MV128>;
710
+
711
+ DEF_ISEL (LD1_ASISDLSE_R2_2V_4H) = LD1_PAIR_16<MV64>;
712
+ DEF_ISEL (LD1_ASISDLSE_R2_2V_8H) = LD1_PAIR_16<MV128>;
713
+
714
+ DEF_ISEL (LD1_ASISDLSE_R2_2V_2S) = LD1_PAIR_32<MV64>;
715
+ DEF_ISEL (LD1_ASISDLSE_R2_2V_4S) = LD1_PAIR_32<MV128>;
716
+
717
+ DEF_ISEL (LD1_ASISDLSE_R2_2V_1D) = LD1_PAIR_64<MV64>;
718
+ DEF_ISEL (LD1_ASISDLSE_R2_2V_2D) = LD1_PAIR_64<MV128>;
719
+
720
+
721
+ namespace {
722
+
723
+ #define MAKE_LD1 (esize ) \
724
+ template <typename S> \
725
+ DEF_SEM (LD1_TRIPLE_ ## esize, V128W dst1, V128W dst2, \
726
+ V128W dst3, S src) { \
727
+ auto elems1 = UReadV ## esize (src); \
728
+ auto elems2 = UReadV ## esize (GetElementPtr (src, 1U )); \
729
+ auto elems3 = UReadV ## esize (GetElementPtr (src, 2U )); \
730
+ UWriteV ## esize (dst1, elems1); \
731
+ UWriteV ## esize (dst2, elems2); \
732
+ UWriteV ## esize (dst3, elems3); \
733
+ return memory; \
734
+ }
735
+
736
+ MAKE_LD1 (8 )
737
+ MAKE_LD1 (16 )
738
+ MAKE_LD1 (32 )
739
+ MAKE_LD1 (64 )
740
+
741
+ #undef MAKE_LD1
742
+
743
+ } // namespace
744
+
745
+ DEF_ISEL (LD1_ASISDLSE_R3_3V_8B) = LD1_TRIPLE_8<MV64>;
746
+ DEF_ISEL (LD1_ASISDLSE_R3_3V_16B) = LD1_TRIPLE_8<MV128>;
747
+
748
+ DEF_ISEL (LD1_ASISDLSE_R3_3V_4H) = LD1_TRIPLE_16<MV64>;
749
+ DEF_ISEL (LD1_ASISDLSE_R3_3V_8H) = LD1_TRIPLE_16<MV128>;
750
+
751
+ DEF_ISEL (LD1_ASISDLSE_R3_3V_2S) = LD1_TRIPLE_32<MV64>;
752
+ DEF_ISEL (LD1_ASISDLSE_R3_3V_4S) = LD1_TRIPLE_32<MV128>;
753
+
754
+ DEF_ISEL (LD1_ASISDLSE_R3_3V_1D) = LD1_TRIPLE_64<MV64>;
755
+ DEF_ISEL (LD1_ASISDLSE_R3_3V_2D) = LD1_TRIPLE_64<MV128>;
756
+
757
+ namespace {
758
+
759
+ #define MAKE_LD1 (esize ) \
760
+ template <typename S> \
761
+ DEF_SEM (LD1_QUAD_ ## esize, V128W dst1, V128W dst2, \
762
+ V128W dst3, V128W dst4, S src) { \
763
+ auto elems1 = UReadV ## esize (src); \
764
+ auto elems2 = UReadV ## esize (GetElementPtr (src, 1U )); \
765
+ auto elems3 = UReadV ## esize (GetElementPtr (src, 2U )); \
766
+ auto elems4 = UReadV ## esize (GetElementPtr (src, 3U )); \
767
+ UWriteV ## esize (dst1, elems1); \
768
+ UWriteV ## esize (dst2, elems2); \
769
+ UWriteV ## esize (dst3, elems3); \
770
+ UWriteV ## esize (dst4, elems4); \
771
+ return memory; \
772
+ }
773
+
774
+ MAKE_LD1 (8 )
775
+ MAKE_LD1 (16 )
776
+ MAKE_LD1 (32 )
777
+ MAKE_LD1 (64 )
778
+
779
+ #undef MAKE_LD1
780
+
781
+ } // namespace
782
+
783
+ DEF_ISEL (LD1_ASISDLSE_R4_4V_8B) = LD1_QUAD_8<MV64>;
784
+ DEF_ISEL (LD1_ASISDLSE_R4_4V_16B) = LD1_QUAD_8<MV128>;
785
+
786
+ DEF_ISEL (LD1_ASISDLSE_R4_4V_4H) = LD1_QUAD_16<MV64>;
787
+ DEF_ISEL (LD1_ASISDLSE_R4_4V_8H) = LD1_QUAD_16<MV128>;
788
+
789
+ DEF_ISEL (LD1_ASISDLSE_R4_4V_2S) = LD1_QUAD_32<MV64>;
790
+ DEF_ISEL (LD1_ASISDLSE_R4_4V_4S) = LD1_QUAD_32<MV128>;
791
+
792
+ DEF_ISEL (LD1_ASISDLSE_R4_4V_1D) = LD1_QUAD_64<MV64>;
793
+ DEF_ISEL (LD1_ASISDLSE_R4_4V_2D) = LD1_QUAD_64<MV128>;
794
+
795
+ namespace {
796
+
658
797
#define MAKE_LD1_POSTINDEX (esize ) \
659
798
template <typename S> \
660
799
DEF_SEM (LD1_SINGLE_POSTINDEX_ ## esize, V128W dst1, S src, \
661
800
R64W addr_reg, ADDR next_addr) { \
662
- auto elems1 = UReadV ## esize (src); \
663
- UWriteV ## esize (dst1, elems1); \
801
+ memory = LD1_SINGLE_ ## esize (memory, state, dst1, src); \
664
802
Write (addr_reg, Read (next_addr)); \
665
803
return memory; \
666
804
}
@@ -692,10 +830,7 @@ namespace {
692
830
template <typename S> \
693
831
DEF_SEM (LD1_PAIR_POSTINDEX_ ## esize, V128W dst1, V128W dst2, S src, \
694
832
R64W addr_reg, ADDR next_addr) { \
695
- auto elems1 = UReadV ## esize (src); \
696
- auto elems2 = UReadV ## esize (GetElementPtr (src, 1U )); \
697
- UWriteV ## esize (dst1, elems1); \
698
- UWriteV ## esize (dst2, elems2); \
833
+ memory = LD1_PAIR_ ## esize (memory, state, dst1, dst2, src); \
699
834
Write (addr_reg, Read (next_addr)); \
700
835
return memory; \
701
836
}
@@ -727,12 +862,7 @@ namespace {
727
862
template <typename S> \
728
863
DEF_SEM (LD1_TRIPLE_POSTINDEX_ ## esize, V128W dst1, V128W dst2, \
729
864
V128W dst3, S src, R64W addr_reg, ADDR next_addr) { \
730
- auto elems1 = UReadV ## esize (src); \
731
- auto elems2 = UReadV ## esize (GetElementPtr (src, 1U )); \
732
- auto elems3 = UReadV ## esize (GetElementPtr (src, 2U )); \
733
- UWriteV ## esize (dst1, elems1); \
734
- UWriteV ## esize (dst2, elems2); \
735
- UWriteV ## esize (dst3, elems3); \
865
+ memory = LD1_TRIPLE_ ## esize (memory, state, dst1, dst2, dst3, src); \
736
866
Write (addr_reg, Read (next_addr)); \
737
867
return memory; \
738
868
}
@@ -764,14 +894,7 @@ namespace {
764
894
template <typename S> \
765
895
DEF_SEM (LD1_QUAD_POSTINDEX_ ## esize, V128W dst1, V128W dst2, \
766
896
V128W dst3, V128W dst4, S src, R64W addr_reg, ADDR next_addr) { \
767
- auto elems1 = UReadV ## esize (src); \
768
- auto elems2 = UReadV ## esize (GetElementPtr (src, 1U )); \
769
- auto elems3 = UReadV ## esize (GetElementPtr (src, 2U )); \
770
- auto elems4 = UReadV ## esize (GetElementPtr (src, 3U )); \
771
- UWriteV ## esize (dst1, elems1); \
772
- UWriteV ## esize (dst2, elems2); \
773
- UWriteV ## esize (dst3, elems3); \
774
- UWriteV ## esize (dst4, elems4); \
897
+ memory = LD1_QUAD_ ## esize (memory, state, dst1, dst2, dst3, dst4, src); \
775
898
Write (addr_reg, Read (next_addr)); \
776
899
return memory; \
777
900
}
@@ -799,6 +922,73 @@ DEF_ISEL(LD1_ASISDLSEP_I4_I4_2D) = LD1_QUAD_POSTINDEX_64<MV128>;
799
922
800
923
namespace {
801
924
925
+ #define MAKE_LD2 (size ) \
926
+ template <typename S> \
927
+ DEF_SEM (LD2_ ## size, V128W dst1, V128W dst2, S src) { \
928
+ auto vec = UReadV ## size (src); \
929
+ auto dst1_vec = UClearV ## size (UReadV ## size (dst1)); \
930
+ auto dst2_vec = UClearV ## size (UReadV ## size (dst2)); \
931
+ _Pragma (" unroll" ) \
932
+ for (size_t i = 0 , j = 0 ; i < NumVectorElems (vec); j++) { \
933
+ dst1_vec = UInsertV ## size (dst1_vec, j, UExtractV ## size (vec, i++)); \
934
+ dst2_vec = UInsertV ## size (dst2_vec, j, UExtractV ## size (vec, i++)); \
935
+ } \
936
+ UWriteV ## size (dst1, dst1_vec); \
937
+ UWriteV ## size (dst2, dst2_vec); \
938
+ return memory; \
939
+ }
940
+
941
+ MAKE_LD2 (8 )
942
+ MAKE_LD2 (16 )
943
+ MAKE_LD2 (32 )
944
+ MAKE_LD2 (64 )
945
+
946
+ #undef MAKE_LD2
947
+
948
+ #define MAKE_LD2 (size ) \
949
+ template <typename S> \
950
+ DEF_SEM (LD2_ ## size ## _POSTINDEX, V128W dst1, V128W dst2, S src, \
951
+ R64W addr_reg, ADDR next_addr) { \
952
+ memory = LD2_ ## size (memory, state, dst1, dst2, src); \
953
+ Write (addr_reg, Read (next_addr)); \
954
+ return memory; \
955
+ }
956
+
957
+ MAKE_LD2 (8 )
958
+ MAKE_LD2 (16 )
959
+ MAKE_LD2 (32 )
960
+ MAKE_LD2 (64 )
961
+
962
+ #undef MAKE_LD2
963
+
964
+ } // namespace
965
+
966
+ DEF_ISEL (LD2_ASISDLSE_R2_8B) = LD2_8<MV128>;
967
+ DEF_ISEL (LD2_ASISDLSE_R2_16B) = LD2_8<MV256>;
968
+ DEF_ISEL (LD2_ASISDLSE_R2_4H) = LD2_16<MV128>;
969
+ DEF_ISEL (LD2_ASISDLSE_R2_8H) = LD2_16<MV256>;
970
+ DEF_ISEL (LD2_ASISDLSE_R2_2S) = LD2_32<MV128>;
971
+ DEF_ISEL (LD2_ASISDLSE_R2_4S) = LD2_32<MV256>;
972
+ DEF_ISEL (LD2_ASISDLSE_R2_2D) = LD2_64<MV256>;
973
+
974
+ DEF_ISEL (LD2_ASISDLSEP_I2_I_8B) = LD2_8_POSTINDEX<MV128>;
975
+ DEF_ISEL (LD2_ASISDLSEP_I2_I_16B) = LD2_8_POSTINDEX<MV256>;
976
+ DEF_ISEL (LD2_ASISDLSEP_I2_I_4H) = LD2_16_POSTINDEX<MV128>;
977
+ DEF_ISEL (LD2_ASISDLSEP_I2_I_8H) = LD2_16_POSTINDEX<MV256>;
978
+ DEF_ISEL (LD2_ASISDLSEP_I2_I_2S) = LD2_32_POSTINDEX<MV128>;
979
+ DEF_ISEL (LD2_ASISDLSEP_I2_I_4S) = LD2_32_POSTINDEX<MV256>;
980
+ DEF_ISEL (LD2_ASISDLSEP_I2_I_2D) = LD2_64_POSTINDEX<MV256>;
981
+
982
+ DEF_ISEL (LD2_ASISDLSEP_R2_R_8B) = LD2_8_POSTINDEX<MV128>;
983
+ DEF_ISEL (LD2_ASISDLSEP_R2_R_16B) = LD2_8_POSTINDEX<MV256>;
984
+ DEF_ISEL (LD2_ASISDLSEP_R2_R_4H) = LD2_16_POSTINDEX<MV128>;
985
+ DEF_ISEL (LD2_ASISDLSEP_R2_R_8H) = LD2_16_POSTINDEX<MV256>;
986
+ DEF_ISEL (LD2_ASISDLSEP_R2_R_2S) = LD2_32_POSTINDEX<MV128>;
987
+ DEF_ISEL (LD2_ASISDLSEP_R2_R_4S) = LD2_32_POSTINDEX<MV256>;
988
+ DEF_ISEL (LD2_ASISDLSEP_R2_R_2D) = LD2_64_POSTINDEX<MV256>;
989
+
990
+ namespace {
991
+
802
992
#define EXTRACT_VEC (prefix, size, ext_op ) \
803
993
template <typename D, typename T> \
804
994
DEF_SEM (prefix ## MovFromVec ## size, D dst, V128 src, I64 index) { \
0 commit comments