Skip to content

Commit 906fef5

Browse files
committed
b
1 parent 33eaa94 commit 906fef5

2 files changed

Lines changed: 31 additions & 0 deletions

File tree

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ jobs:
114114
runs-on: ubuntu-latest
115115
strategy:
116116
matrix:
117+
fail-fast: false
117118
include:
118119
- name: milkv-duo
119120
single-core: true

highgui/src/stb_image.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3224,6 +3224,35 @@ static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
32243224
vint16m1_t out3 = __riscv_vnsra_wx_i16m1(__riscv_vadd_vv_i32m2(x3, x4, vl), 17, vl);
32253225
vint16m1_t out4 = __riscv_vnsra_wx_i16m1(__riscv_vsub_vv_i32m2(x3, x4, vl), 17, vl);
32263226

3227+
#ifdef __THEAD_VERSION__
3228+
// clamp 0~255
3229+
out0 = __riscv_vmax_vx_i16m1(out0, 0, vl);
3230+
out7 = __riscv_vmax_vx_i16m1(out7, 0, vl);
3231+
out1 = __riscv_vmax_vx_i16m1(out1, 0, vl);
3232+
out6 = __riscv_vmax_vx_i16m1(out6, 0, vl);
3233+
out2 = __riscv_vmax_vx_i16m1(out2, 0, vl);
3234+
out5 = __riscv_vmax_vx_i16m1(out5, 0, vl);
3235+
out3 = __riscv_vmax_vx_i16m1(out3, 0, vl);
3236+
out4 = __riscv_vmax_vx_i16m1(out4, 0, vl);
3237+
vuint8m1_t out0u8 = __riscv_vnclipu_wx_u8m1(__riscv_vreinterpret_v_i16m2_u16m2(__riscv_vcreate_v_i16m1_i16m2(out0, out0)), 0, __RISCV_VXRM_RNU, vl);
3238+
vuint8m1_t out7u8 = __riscv_vnclipu_wx_u8m1(__riscv_vreinterpret_v_i16m2_u16m2(__riscv_vcreate_v_i16m1_i16m2(out7, out7)), 0, __RISCV_VXRM_RNU, vl);
3239+
vuint8m1_t out1u8 = __riscv_vnclipu_wx_u8m1(__riscv_vreinterpret_v_i16m2_u16m2(__riscv_vcreate_v_i16m1_i16m2(out1, out1)), 0, __RISCV_VXRM_RNU, vl);
3240+
vuint8m1_t out6u8 = __riscv_vnclipu_wx_u8m1(__riscv_vreinterpret_v_i16m2_u16m2(__riscv_vcreate_v_i16m1_i16m2(out6, out6)), 0, __RISCV_VXRM_RNU, vl);
3241+
vuint8m1_t out2u8 = __riscv_vnclipu_wx_u8m1(__riscv_vreinterpret_v_i16m2_u16m2(__riscv_vcreate_v_i16m1_i16m2(out2, out2)), 0, __RISCV_VXRM_RNU, vl);
3242+
vuint8m1_t out5u8 = __riscv_vnclipu_wx_u8m1(__riscv_vreinterpret_v_i16m2_u16m2(__riscv_vcreate_v_i16m1_i16m2(out5, out5)), 0, __RISCV_VXRM_RNU, vl);
3243+
vuint8m1_t out3u8 = __riscv_vnclipu_wx_u8m1(__riscv_vreinterpret_v_i16m2_u16m2(__riscv_vcreate_v_i16m1_i16m2(out3, out3)), 0, __RISCV_VXRM_RNU, vl);
3244+
vuint8m1_t out4u8 = __riscv_vnclipu_wx_u8m1(__riscv_vreinterpret_v_i16m2_u16m2(__riscv_vcreate_v_i16m1_i16m2(out4, out4)), 0, __RISCV_VXRM_RNU, vl);
3245+
3246+
// 8x8 transpose
3247+
__riscv_vsse8_v_u8m1(out + 0, out_stride, out0u8, vl);
3248+
__riscv_vsse8_v_u8m1(out + 1, out_stride, out1u8, vl);
3249+
__riscv_vsse8_v_u8m1(out + 2, out_stride, out2u8, vl);
3250+
__riscv_vsse8_v_u8m1(out + 3, out_stride, out3u8, vl);
3251+
__riscv_vsse8_v_u8m1(out + 4, out_stride, out4u8, vl);
3252+
__riscv_vsse8_v_u8m1(out + 5, out_stride, out5u8, vl);
3253+
__riscv_vsse8_v_u8m1(out + 6, out_stride, out6u8, vl);
3254+
__riscv_vsse8_v_u8m1(out + 7, out_stride, out7u8, vl);
3255+
#else // __THEAD_VERSION__
32273256
// clamp 0~255
32283257
vuint8mf2_t out0u8 = __riscv_vnclipu_wx_u8mf2(__riscv_vreinterpret_v_i16m1_u16m1(__riscv_vmax_vx_i16m1(out0, 0, vl)), 0, __RISCV_VXRM_RNU, vl);
32293258
vuint8mf2_t out7u8 = __riscv_vnclipu_wx_u8mf2(__riscv_vreinterpret_v_i16m1_u16m1(__riscv_vmax_vx_i16m1(out7, 0, vl)), 0, __RISCV_VXRM_RNU, vl);
@@ -3243,6 +3272,7 @@ static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
32433272
__riscv_vsse8_v_u8mf2(out + 5, out_stride, out5u8, vl);
32443273
__riscv_vsse8_v_u8mf2(out + 6, out_stride, out6u8, vl);
32453274
__riscv_vsse8_v_u8mf2(out + 7, out_stride, out7u8, vl);
3275+
#endif // __THEAD_VERSION__
32463276
}
32473277
#endif
32483278
}

0 commit comments

Comments
 (0)