Skip to content

Commit 86767b0

Browse files
authored
support to export gguf q4_0 and q4_1 format (#393)
1 parent ba2426c commit 86767b0

File tree

12 files changed

+4827
-28
lines changed

12 files changed

+4827
-28
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
endianess

auto_round/__main__.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,15 @@
1414
import sys
1515

1616
def run_eval():
17-
from auto_round.script.llm import setup_eval_parser, eval
18-
args = setup_eval_parser()
19-
eval(args)
17+
if "--native" in sys.argv:
18+
sys.argv.remove("--native")
19+
from auto_round.script.llm import setup_eval_parser, eval
20+
args = setup_eval_parser()
21+
eval(args)
22+
else:
23+
from auto_round.script.llm import setup_eval_parser, eval_sequence
24+
args = setup_eval_parser()
25+
eval_sequence(args)
2026

2127
def run():
2228
if "--eval" in sys.argv:

auto_round/autoround.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,6 +1267,14 @@ def save_quantized(self, output_dir=None, format="auto_round", inplace=True, **k
12671267
if processor is not None:
12681268
processor.save_pretrained(output_dir)
12691269
return
1270+
if format in ["gguf:q4_0", "gguf:q4_1"]:
1271+
if self.group_size != 32:
1272+
logger.error(f"{format} need group_size=32, but it is {self.group_size}, cannot export.")
1273+
return
1274+
if format == "gguf:q4_0" and not self.sym:
1275+
logger.warning(f"incorrect format choose, will reset to gguf:q4_1")
1276+
if format == "gguf:q4_1" and self.sym:
1277+
logger.warning(f"incorrect format choose, will reset to gguf:q4_0")
12701278

12711279
from auto_round.export import EXPORT_FORMAT
12721280
backend = format

auto_round/export/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,8 @@ def _save_quantized_as_autoawq(*args, **kwargs):
4848
from auto_round.export.export_to_awq.export import save_quantized_as_autoawq
4949

5050
return save_quantized_as_autoawq(*args, **kwargs)
51+
52+
@register_format("gguf")
53+
def _save_quantized_as_autoawq(*args, **kwargs):
54+
from auto_round.export.export_to_gguf.export import save_quantized_as_gguf
55+
return save_quantized_as_gguf(*args, **kwargs)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

0 commit comments

Comments
 (0)