Skip to content

Commit

Permalink
Copybara import of the project:
Browse files Browse the repository at this point in the history
--
9e3f85a by sunyuechi <[email protected]>:

Add RVV f16-f32-vcvt kernels and configs.

FUTURE_COPYBARA_INTEGRATE_REVIEW=#7641 from hleft:master 9e3f85a
PiperOrigin-RevId: 725406354
  • Loading branch information
sunyuechi authored and xnnpack-bot committed Feb 11, 2025
1 parent e55b399 commit 9a7e9bc
Show file tree
Hide file tree
Showing 8 changed files with 175 additions and 0 deletions.
3 changes: 3 additions & 0 deletions cmake/gen/rvvfp16arith_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
SET(PROD_RVVFP16ARITH_MICROKERNEL_SRCS)

SET(NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS
src/f16-f32-vcvt/gen/f16-f32-vcvt-rvvfp16arith-u1v.c
src/f16-f32-vcvt/gen/f16-f32-vcvt-rvvfp16arith-u2v.c
src/f16-f32-vcvt/gen/f16-f32-vcvt-rvvfp16arith-u4v.c
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u1v.c
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u2v.c
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u4v.c
Expand Down
3 changes: 3 additions & 0 deletions gen/rvvfp16arith_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ PROD_RVVFP16ARITH_MICROKERNEL_SRCS = [
]

NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS = [
"src/f16-f32-vcvt/gen/f16-f32-vcvt-rvvfp16arith-u1v.c",
"src/f16-f32-vcvt/gen/f16-f32-vcvt-rvvfp16arith-u2v.c",
"src/f16-f32-vcvt/gen/f16-f32-vcvt-rvvfp16arith-u4v.c",
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u1v.c",
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u2v.c",
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u4v.c",
Expand Down
5 changes: 5 additions & 0 deletions scripts/generate-f16-f32-vcvt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ tools/xngen src/f16-f32-vcvt/neon-int32.c.in -D BATCH_TILE=32 -o src/f16-f32-vcv
tools/xngen src/f16-f32-vcvt/neonfp16.c.in -D BATCH_TILE=8 -o src/f16-f32-vcvt/gen/f16-f32-vcvt-neonfp16-u8.c &
tools/xngen src/f16-f32-vcvt/neonfp16.c.in -D BATCH_TILE=16 -o src/f16-f32-vcvt/gen/f16-f32-vcvt-neonfp16-u16.c &

################################ RISC-V Vector ################################
tools/xngen src/f16-f32-vcvt/rvvfp16arith.c.in -D LMUL=1 -o src/f16-f32-vcvt/gen/f16-f32-vcvt-rvvfp16arith-u1v.c &
tools/xngen src/f16-f32-vcvt/rvvfp16arith.c.in -D LMUL=2 -o src/f16-f32-vcvt/gen/f16-f32-vcvt-rvvfp16arith-u2v.c &
tools/xngen src/f16-f32-vcvt/rvvfp16arith.c.in -D LMUL=4 -o src/f16-f32-vcvt/gen/f16-f32-vcvt-rvvfp16arith-u4v.c &

################################# x86 128-bit #################################
tools/xngen src/f16-f32-vcvt/sse-int16.c.in -D SSE=2 -D AVX=0 -D BATCH_TILE=8 -o src/f16-f32-vcvt/gen/f16-f32-vcvt-sse2-int16-u8.c &
tools/xngen src/f16-f32-vcvt/sse-int16.c.in -D SSE=2 -D AVX=0 -D BATCH_TILE=16 -o src/f16-f32-vcvt/gen/f16-f32-vcvt-sse2-int16-u16.c &
Expand Down
6 changes: 6 additions & 0 deletions src/f16-f32-vcvt/f16-f32-vcvt.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f16_f32_vcvt_ukernel__wasmrelaxedsimd_int32_u
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f16_f32_vcvt_ukernel__wasmrelaxedsimd_int32_u32, 32, false, xnn_float16, float, void, NULL)
#endif // XNN_ARCH_WASMRELAXEDSIMD

#if XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
XNN_CVT_UKERNEL_WITH_PARAMS(xnn_arch_riscv_vector_fp16_arith, xnn_f16_f32_vcvt_ukernel__rvvfp16arith_u1v, 1, true, xnn_float16, float, void, NULL)
XNN_CVT_UKERNEL_WITH_PARAMS(xnn_arch_riscv_vector_fp16_arith, xnn_f16_f32_vcvt_ukernel__rvvfp16arith_u2v, 2, true, xnn_float16, float, void, NULL)
XNN_CVT_UKERNEL_WITH_PARAMS(xnn_arch_riscv_vector_fp16_arith, xnn_f16_f32_vcvt_ukernel__rvvfp16arith_u4v, 4, true, xnn_float16, float, void, NULL)
#endif

XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f16_f32_vcvt_ukernel__scalar_u1, 1, false, xnn_float16, float, void, NULL)
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f16_f32_vcvt_ukernel__scalar_u2, 2, false, xnn_float16, float, void, NULL)
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_f16_f32_vcvt_ukernel__scalar_u3, 3, false, xnn_float16, float, void, NULL)
Expand Down
40 changes: 40 additions & 0 deletions src/f16-f32-vcvt/gen/f16-f32-vcvt-rvvfp16arith-u1v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Auto-generated file. Do not edit!
// Template: src/f16-f32-vcvt/rvvfp16arith.c.in
// Generator: tools/xngen
//
// Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/vcvt.h"


void xnn_f16_f32_vcvt_ukernel__rvvfp16arith_u1v(
size_t batch,
const xnn_float16* input,
float* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(xnn_float16) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_HALF;

const _Float16* i = (const _Float16*) input;
for (; batch > 0;) {
const int32_t n = __riscv_vsetvl_e16m1(batch); batch -= n;

vfloat16m1_t x_f16v = __riscv_vle16_v_f16m1(i, n); i += n;

vfloat32m2_t y_f32v = __riscv_vfwcvt_f_f_v_f32m2(x_f16v, n);

__riscv_vse32_v_f32m2(output, y_f32v, n); output += n;
}
}
40 changes: 40 additions & 0 deletions src/f16-f32-vcvt/gen/f16-f32-vcvt-rvvfp16arith-u2v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Auto-generated file. Do not edit!
// Template: src/f16-f32-vcvt/rvvfp16arith.c.in
// Generator: tools/xngen
//
// Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/vcvt.h"


void xnn_f16_f32_vcvt_ukernel__rvvfp16arith_u2v(
size_t batch,
const xnn_float16* input,
float* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(xnn_float16) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_HALF;

const _Float16* i = (const _Float16*) input;
for (; batch > 0;) {
const int32_t n = __riscv_vsetvl_e16m2(batch); batch -= n;

vfloat16m2_t x_f16v = __riscv_vle16_v_f16m2(i, n); i += n;

vfloat32m4_t y_f32v = __riscv_vfwcvt_f_f_v_f32m4(x_f16v, n);

__riscv_vse32_v_f32m4(output, y_f32v, n); output += n;
}
}
40 changes: 40 additions & 0 deletions src/f16-f32-vcvt/gen/f16-f32-vcvt-rvvfp16arith-u4v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Auto-generated file. Do not edit!
// Template: src/f16-f32-vcvt/rvvfp16arith.c.in
// Generator: tools/xngen
//
// Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/vcvt.h"


void xnn_f16_f32_vcvt_ukernel__rvvfp16arith_u4v(
size_t batch,
const xnn_float16* input,
float* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(xnn_float16) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_HALF;

const _Float16* i = (const _Float16*) input;
for (; batch > 0;) {
const int32_t n = __riscv_vsetvl_e16m4(batch); batch -= n;

vfloat16m4_t x_f16v = __riscv_vle16_v_f16m4(i, n); i += n;

vfloat32m8_t y_f32v = __riscv_vfwcvt_f_f_v_f32m8(x_f16v, n);

__riscv_vse32_v_f32m8(output, y_f32v, n); output += n;
}
}
38 changes: 38 additions & 0 deletions src/f16-f32-vcvt/rvvfp16arith.c.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

$assert LMUL in [1, 2, 4]
$LMUL_32 = {1: "2", 2: "4", 4: "8"}[LMUL]
#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/vcvt.h"


void xnn_f16_f32_vcvt_ukernel__rvvfp16arith_u${LMUL}v(
size_t batch,
const xnn_float16* input,
float* output,
const void* params)
{
assert(batch != 0);
assert(batch % sizeof(xnn_float16) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_HALF;

const _Float16* i = (const _Float16*) input;
for (; batch > 0;) {
const int32_t n = __riscv_vsetvl_e16m${LMUL}(batch); batch -= n;

vfloat16m${LMUL}_t x_f16v = __riscv_vle16_v_f16m${LMUL}(i, n); i += n;

vfloat32m${LMUL_32}_t y_f32v = __riscv_vfwcvt_f_f_v_f32m${LMUL_32}(x_f16v, n);

__riscv_vse32_v_f32m${LMUL_32}(output, y_f32v, n); output += n;
}
}

0 comments on commit 9a7e9bc

Please sign in to comment.