Skip to content

Commit 584bb75

Browse files
authored
add index bitwidth=10 (#181)
add index bitwidth=10
1 parent 0f153b3 commit 584bb75

File tree

3 files changed

+11
-1
lines changed

3 files changed

+11
-1
lines changed

csrc/dequant.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ namespace vptq {
6363
case 12: \
6464
callDequantWithOutliers_dtype(12, BASEGROUP, OUT_OUF_INF, ResidualBits); \
6565
break; \
66+
case 10: \
67+
callDequantWithOutliers_dtype(10, BASEGROUP, OUT_OUF_INF, ResidualBits); \
68+
break; \
6669
case 8: \
6770
callDequantWithOutliers_dtype(8, BASEGROUP, OUT_OUF_INF, ResidualBits); \
6871
break; \
@@ -200,6 +203,7 @@ torch::Tensor launch_deqantize_outliers_cuda_packkernel(
200203
switch (base_groupsize) {
201204
CASE_DispatchDequantWithOutliers(16);
202205
CASE_DispatchDequantWithOutliers(12);
206+
CASE_DispatchDequantWithOutliers(10);
203207
CASE_DispatchDequantWithOutliers(8);
204208
CASE_DispatchDequantWithOutliers(6);
205209
CASE_DispatchDequantWithOutliers(4);

csrc/quant_gemv.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ namespace vptq {
6767
case 12: \
6868
CallWqA16kernel_dtype(out_buf, 12, BASEGROUP, Do_Reduce, ResidualBits); \
6969
break; \
70+
case 10: \
71+
CallWqA16kernel_dtype(out_buf, 10, BASEGROUP, Do_Reduce, ResidualBits); \
72+
break; \
7073
case 8: \
7174
CallWqA16kernel_dtype(out_buf, 8, BASEGROUP, Do_Reduce, ResidualBits); \
7275
break; \
@@ -209,6 +212,9 @@ torch::Tensor launch_gemv_outliers_cuda_packkernel(
209212
case 12:
210213
DispatchWqA16Kernel(tmp_output, 12, do_reduce);
211214
break;
215+
case 10:
216+
DispatchWqA16Kernel(tmp_output, 10, do_reduce);
217+
break;
212218
case 8:
213219
DispatchWqA16Kernel(tmp_output, 8, do_reduce);
214220
break;

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "vptq"
3-
version = "0.0.5"
3+
version = "0.0.5post1"
44
authors = [
55
{ name="Ying Cao", email="lcy.seso@gmail.com" },
66
{ name="Jicheng Wen", email="wejoincy@gmail.com" },

0 commit comments

Comments
 (0)