Skip to content

Commit a29f40d

Browse files
authored
Merge pull request #34 from mobiusml/perf
Improved Perf
2 parents edbd025 + 0ebca76 commit a29f40d

24 files changed

Lines changed: 1020 additions & 756 deletions

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ from gemlite import DType, GemLiteLinear
6666
#Reset the default cache to get the best perf but warm-up will be slow.
6767
#gemlite.reset_cache()
6868

69+
#Set autotune mode: fast:faste start-up (default), max: long startt-up but best perf, default/False: no autotune
70+
#gemlite.set_autotune("fast")
71+
72+
#Enable kernel caching: makes some kernels faster, but might break with some torch.compile settings
73+
#gemlite.set_kernel_caching(True)
74+
6975
#Main constructor
7076
gemlite_linear = GemLiteLinear(
7177
W_nbits, #weight quantization bitwidth. supported: [8, 4, 2, 1]
@@ -77,7 +83,7 @@ gemlite_linear = GemLiteLinear(
7783
scaled_activations=False, #If the activations are scaled or not
7884
)
7985

80-
#Packing: we follow the same format as hqq (https://github.com/mobiusml/hqq/)
86+
#Packing: we follow the hqq format (W_q - zeros) * scales ~ W (https://github.com/mobiusml/hqq/)
8187
gemlite_linear.pack(W_q, scales, zeros, bias)
8288

8389
#Forward

gemlite/__init__.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,27 @@
1-
__version__ = "0.4.6"
1+
__version__ = "0.4.7"
22
__author__ = 'Dr. Hicham Badri'
33
__credits__ = 'Mobius Labs GmbH'
44

5-
from .core import GemLiteLinearTriton, GemLiteLinear, DType, GEMLITE_ACC_DTYPE, set_autotune_setting, set_packing_bitwidth, set_acc_dtype
6-
from .helper import A16W8, A8W8_int8_dynamic, A8W8_fp8_dynamic, A16Wn, A8Wn_dynamic
5+
from .core import (
6+
GemLiteLinearTriton,
7+
GemLiteLinear,
8+
DType,
9+
GEMLITE_ACC_DTYPE,
10+
set_autotune_setting,
11+
set_packing_bitwidth,
12+
set_acc_dtype,
13+
set_autotune,
14+
set_kernel_caching,
15+
forward_functional,
16+
)
17+
from .helper import (
18+
A16W8,
19+
A8W8_int8_dynamic,
20+
A8W8_fp8_dynamic,
21+
A16Wn,
22+
A8Wn_dynamic,
23+
)
24+
725

826
load_config = GemLiteLinear.load_config
927
cache_config = GemLiteLinear.cache_config

gemlite/configs/4090.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

gemlite/configs/6000_ada.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

gemlite/configs/a100.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

gemlite/configs/a40.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

gemlite/configs/a6000.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

gemlite/configs/h100.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

gemlite/configs/l40.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

gemlite/configs/l40s.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)