@@ -798,42 +798,60 @@ definitions:
798798 implementation : " return ~0;"
799799 # INTEL - AVX2
800800 - target_extension : " avx2"
801- ctype : [ "uint8_t", "uint16_t", "uint32_t", "uint64_t", " int8_t", "int16_t", "int32_t", "int64_t", " float", "double" ]
801+ ctype : [ "uint8_t", "uint16_t", "uint32_t", "int8_t", "int16_t", "int32_t", "float"]
802802 lscpu_flags : [ "avx2" ]
803803 includes : ["<type_traits>"]
804804 implementation : |
805- if constexpr(Vec::vector_element_count() < 8) {
806- return ((static_cast<typename Vec::imask_type>(1)<<Vec::vector_element_count()) - 1);
807- } else {
808- return ~0;
809- }
805+ return static_cast<typename Vec::imask_type>(~0);
806+ - target_extension : " avx2"
807+ ctype : [ "uint64_t", "int64_t", "double" ]
808+ lscpu_flags : [ "avx2" ]
809+ includes : ["<type_traits>"]
810+ implementation : |
811+ return static_cast<typename Vec::imask_type>(0b1111);
810812 # INTEL - SSE
811813 - target_extension : " sse"
812- ctype : [ "uint8_t", "uint16_t", "uint32_t", "uint64_t", " int8_t", "int16_t", "int32_t", "int64_t", "float", "double" ]
814+ ctype : [ "uint8_t", "uint16_t", "int8_t", "int16_t"]
813815 lscpu_flags : [ "sse" ]
814816 includes : ["<type_traits>"]
815817 implementation : |
816- if constexpr(Vec::vector_element_count() < 8) {
817- return ((static_cast<typename Vec::imask_type>(1)<<Vec::vector_element_count()) - 1);
818- } else {
819- return ~0;
820- }
818+ return static_cast<typename Vec::imask_type>(~0);
819+ - target_extension : " sse"
820+ ctype : [ "uint32_t", "int32_t", "float" ]
821+ lscpu_flags : [ "sse" ]
822+ includes : ["<type_traits>"]
823+ implementation : |
824+ return static_cast<typename Vec::imask_type>(0b1111);
825+ - target_extension : " sse"
826+ ctype : [ "uint64_t", "int64_t", "double" ]
827+ lscpu_flags : [ "sse" ]
828+ includes : ["<type_traits>"]
829+ implementation : |
830+ return static_cast<typename Vec::imask_type>(0b11);
821831 # SCALAR
822832 - target_extension : " scalar"
823833 ctype : [ "uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double" ]
824834 lscpu_flags : []
825835 implementation : return true;
826836 # ARM - NEON
827837 - target_extension : " neon"
828- ctype : [ "uint8_t", "uint16_t", "uint32_t", "uint64_t", " int8_t", "int16_t", "int32_t", "int64_t", "float", "double" ]
838+ ctype : [ "uint8_t", "uint16_t", "int8_t", "int16_t"]
829839 lscpu_flags : [ "neon" ]
830840 includes : ["<type_traits>"]
831841 implementation : |
832- if constexpr(Vec::vector_element_count() < 8) {
833- return ((static_cast<typename Vec::imask_type>(1)<<Vec::vector_element_count()) - 1);
834- } else {
835- return ~0;
836- }
842+ return static_cast<typename Vec::imask_type>(~0);
843+ - target_extension : " neon"
844+ ctype : [ "uint32_t", "int32_t", "float" ]
845+ lscpu_flags : [ "neon" ]
846+ includes : ["<type_traits>"]
847+ implementation : |
848+ return static_cast<typename Vec::imask_type>(0b1111);
849+ - target_extension : " neon"
850+ ctype : [ "uint64_t", "int64_t", "double" ]
851+ lscpu_flags : [ "neon" ]
852+ includes : ["<type_traits>"]
853+ implementation : |
854+ return static_cast<typename Vec::imask_type>(0b11);
837855 ...
838856---
839857primitive_name : " integral_all_false"
@@ -957,19 +975,19 @@ definitions:
957975 - target_extension : " avx512"
958976 ctype : ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double"]
959977 lscpu_flags : ["avx512f"]
960- implementation : " return (mask >> position) & ((1ULL << Vec::vector_element_count()) - 1 );"
978+ implementation : " return (mask >> position) & (static_cast<typename Vec::imask_type>(~0) );"
961979 - target_extension : " avx2"
962980 ctype : ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double"]
963981 lscpu_flags : ["avx"]
964- implementation : " return (mask >> position) & ((1ULL << Vec::vector_element_count()) - 1 );"
982+ implementation : " return (mask >> position) & (static_cast<typename Vec::imask_type>(~0) );"
965983 - target_extension : " sse"
966984 ctype : ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double"]
967985 lscpu_flags : ["sse"]
968- implementation : " return (mask >> position) & ((1ULL << Vec::vector_element_count()) - 1 );"
986+ implementation : " return (mask >> position) & (static_cast<typename Vec::imask_type>(~0) );"
969987 - target_extension : " scalar"
970988 ctype : ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double"]
971989 lscpu_flags : []
972- implementation : " return (mask >> position) & ((1ULL << Vec::vector_element_count()) - 1 );"
990+ implementation : " return (mask >> position) & (static_cast<typename Vec::imask_type>(~0) );"
973991...
974992# ---
975993# primitive_name: "mask_reduce"
0 commit comments