Copybara import of the project:

dsharlet · xnnpack-bot · commit 6790eb91c775 · 2025-02-18T23:16:19.000-08:00
-- 17f6dab by Dillon <dsharlet@gmail.com>: Add .vs to .gitignore -- 951240c by Dillon <dsharlet@gmail.com>: Fix warnings on MSVC -- 585faa1 by Dillon <dsharlet@gmail.com>: Disable warning on MSVC -- 92acc17 by Dillon <dsharlet@gmail.com>: Disable AVX256 when /vlen not supported by MSVC FUTURE_COPYBARA_INTEGRATE_REVIEW=#7840 from google:ds/windows 92acc17 PiperOrigin-RevId: 728495899
diff --git a/.gitignore b/.gitignore
@@ -34,3 +34,4 @@ ehthumbs.db
 Thumbs.db
 *.swp
 .vscode
+.vs
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -277,6 +277,12 @@ ELSEIF(CMAKE_C_COMPILER_ID STREQUAL "Clang")
   ENDIF()
 ELSEIF(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
   SET(XNNPACK_ENABLE_AVX512BF16 OFF)
+  IF(MSVC_VERSION LESS_EQUAL 1941)
+    # /vlen option not supported
+    SET(XNNPACK_ENABLE_AVX256SKX OFF)
+    SET(XNNPACK_ENABLE_AVX256VNNI OFF)
+    SET(XNNPACK_ENABLE_AVX256VNNIGFNI OFF)
+  ENDIF()
 ENDIF()
 OPTION(XNNPACK_ENABLE_HVX "Build XNNPACK with Hexagon HVX micro-kernels" ON)
 OPTION(XNNPACK_ENABLE_KLEIDIAI "Use KleidiAI GEMM microkernels for Arm" ON)
@@ -341,6 +347,8 @@ IF(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
   # Test files have many sections, increase the limit. See
   # https://learn.microsoft.com/en-us/cpp/build/reference/bigobj-increase-number-of-sections-in-dot-obj-file.
   ADD_COMPILE_OPTIONS("/bigobj")
+  # Our float16 datatypes have constructors in C++ and not in C, which prompts a warning in MSVC
+  ADD_COMPILE_OPTIONS("/wd4190")
 ENDIF()
 
 IF(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
diff --git a/src/configs/gemm-config.c b/src/configs/gemm-config.c
@@ -2799,7 +2799,11 @@ static void init_qd8_f16_qc8w_gemm_config(void) {
         qd8_f16_qc8w_gemm_config.pack_weights_and_biases = NULL;  // Override the default packing function.
         qd8_f16_qc8w_gemm_config.packed_stride_weights_and_biases = NULL;  // Override the default packing function.
         qd8_f16_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w;
+        #if XNN_ENABLE_AVX256VNNI
         qd8_f16_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_qs8_packw_gemm_goi_ukernel_x64c4__avx256vnni_prfm;
+        #else
+        qd8_f16_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w;
+        #endif
         qd8_f16_qc8w_gemm_config.mr = 16;
         qd8_f16_qc8w_gemm_config.nr = 64;
         qd8_f16_qc8w_gemm_config.log2_kr = 2;
@@ -3389,7 +3393,11 @@ static void init_qd8_f32_qc8w_gemm_config(void) {
         qd8_f32_qc8w_gemm_config.pack_weights_and_biases = NULL;  // Override the default packing function.
         qd8_f32_qc8w_gemm_config.packed_stride_weights_and_biases = NULL;  // Override the default packing function.
         qd8_f32_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w;
+        #if XNN_ENABLE_AVX256VNNI
         qd8_f32_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_qs8_packw_gemm_goi_ukernel_x64c4__avx256vnni_prfm;
+        #else
+        qd8_f32_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w;
+        #endif
         qd8_f32_qc8w_gemm_config.mr = 16;
         qd8_f32_qc8w_gemm_config.nr = 64;
         qd8_f32_qc8w_gemm_config.log2_kr = 2;
@@ -4089,7 +4097,11 @@ static void init_qs8_qc8w_gemm_config(void) {
         qs8_qc8w_gemm_config.pack_weights_and_biases = NULL;  // Override the default packing function.
         qs8_qc8w_gemm_config.packed_stride_weights_and_biases = NULL;  // Override the default packing function.
         qs8_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w;
+        #if XNN_ENABLE_AVX256VNNI
         qs8_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_qs8_packw_gemm_goi_ukernel_x64c4__avx256vnni_prfm;
+        #else
+        qs8_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w;
+        #endif
         qs8_qc8w_gemm_config.pack_igemm_goki = (xnn_pack_conv_goki_w_fn) xnn_pack_qs8_conv_goki_w;
         qs8_qc8w_gemm_config.pack_igemm_kgo = (xnn_pack_conv_kgo_w_fn) xnn_pack_qs8_conv_kgo_w;
         qs8_qc8w_gemm_config.pack_deconv_goki = (xnn_pack_deconv_goki_w_fn) xnn_pack_qs8_deconv_goki_w;
diff --git a/src/operator-run.c b/src/operator-run.c
@@ -2237,14 +2237,14 @@ void xnn_compute_f16_qd8_convert(
     const struct f16_qd8_convert_context context[restrict XNN_MIN_ELEMENTS(1)],
     size_t batch_index)
 {
-  return xnn_compute_f16_qx8_convert(context, xnn_f16_qd8_asymmetric_quantization_params, batch_index);
+  xnn_compute_f16_qx8_convert(context, xnn_f16_qd8_asymmetric_quantization_params, batch_index);
 }
 
 void xnn_compute_f16_qdu8_convert(
     const struct f16_qd8_convert_context context[restrict XNN_MIN_ELEMENTS(1)],
     size_t batch_index)
 {
-  return xnn_compute_f16_qx8_convert(context, xnn_f16_qdu8_asymmetric_quantization_params, batch_index);
+  xnn_compute_f16_qx8_convert(context, xnn_f16_qdu8_asymmetric_quantization_params, batch_index);
 }
 
 void xnn_compute_f32_qx8_convert(
@@ -2273,14 +2273,14 @@ void xnn_compute_f32_qd8_convert(
     const struct f32_qd8_convert_context context[restrict XNN_MIN_ELEMENTS(1)],
     size_t batch_index)
 {
-  return xnn_compute_f32_qx8_convert(context, xnn_f32_qd8_asymmetric_quantization_params, batch_index);
+  xnn_compute_f32_qx8_convert(context, xnn_f32_qd8_asymmetric_quantization_params, batch_index);
 }
 
 void xnn_compute_f32_qdu8_convert(
     const struct f32_qd8_convert_context context[restrict XNN_MIN_ELEMENTS(1)],
     size_t batch_index)
 {
-  return xnn_compute_f32_qx8_convert(context, xnn_f32_qdu8_asymmetric_quantization_params, batch_index);
+  xnn_compute_f32_qx8_convert(context, xnn_f32_qdu8_asymmetric_quantization_params, batch_index);
 }
 
 void xnn_compute_pack_lh(

-Original file line number
+Diff line change
 Thumbs.db
 *.swp
 .vscode
 +.vs
Original file line number	Diff line number	Diff line change
`@@ -2237,14 +2237,14 @@ void xnn_compute_f16_qd8_convert(`
`2237`	`2237`	`const struct f16_qd8_convert_context context[restrict XNN_MIN_ELEMENTS(1)],`
`2238`	`2238`	`size_t batch_index)`
`2239`	`2239`	`{`
`2240`		`- return xnn_compute_f16_qx8_convert(context, xnn_f16_qd8_asymmetric_quantization_params, batch_index);`
	`2240`	`+ xnn_compute_f16_qx8_convert(context, xnn_f16_qd8_asymmetric_quantization_params, batch_index);`
`2241`	`2241`	`}`
`2242`	`2242`
`2243`	`2243`	`void xnn_compute_f16_qdu8_convert(`
`2244`	`2244`	`const struct f16_qd8_convert_context context[restrict XNN_MIN_ELEMENTS(1)],`
`2245`	`2245`	`size_t batch_index)`
`2246`	`2246`	`{`
`2247`		`- return xnn_compute_f16_qx8_convert(context, xnn_f16_qdu8_asymmetric_quantization_params, batch_index);`
	`2247`	`+ xnn_compute_f16_qx8_convert(context, xnn_f16_qdu8_asymmetric_quantization_params, batch_index);`
`2248`	`2248`	`}`
`2249`	`2249`
`2250`	`2250`	`void xnn_compute_f32_qx8_convert(`
`@@ -2273,14 +2273,14 @@ void xnn_compute_f32_qd8_convert(`
`2273`	`2273`	`const struct f32_qd8_convert_context context[restrict XNN_MIN_ELEMENTS(1)],`
`2274`	`2274`	`size_t batch_index)`
`2275`	`2275`	`{`
`2276`		`- return xnn_compute_f32_qx8_convert(context, xnn_f32_qd8_asymmetric_quantization_params, batch_index);`
	`2276`	`+ xnn_compute_f32_qx8_convert(context, xnn_f32_qd8_asymmetric_quantization_params, batch_index);`
`2277`	`2277`	`}`
`2278`	`2278`
`2279`	`2279`	`void xnn_compute_f32_qdu8_convert(`
`2280`	`2280`	`const struct f32_qd8_convert_context context[restrict XNN_MIN_ELEMENTS(1)],`
`2281`	`2281`	`size_t batch_index)`
`2282`	`2282`	`{`
`2283`		`- return xnn_compute_f32_qx8_convert(context, xnn_f32_qdu8_asymmetric_quantization_params, batch_index);`
	`2283`	`+ xnn_compute_f32_qx8_convert(context, xnn_f32_qdu8_asymmetric_quantization_params, batch_index);`
`2284`	`2284`	`}`
`2285`	`2285`
`2286`	`2286`	`void xnn_compute_pack_lh(`