Skip to content

Commit d8bbb07

Browse files
authored
Merge pull request #176 from stochasticai/toan/fix_int8
fix: int8 fine-tuning
2 parents a94a0e1 + 81d18ea commit d8bbb07

File tree

3 files changed

+16
-18
lines changed

3 files changed

+16
-18
lines changed

examples/gptj/gptj_lora_int8.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import gc
2+
13
from xturing.datasets.instruction_dataset import InstructionDataset
24
from xturing.models import BaseModel
35

@@ -10,6 +12,9 @@
1012
# Save the model
1113
model.save("./gptj_weights")
1214

15+
del model
16+
gc.collect()
17+
model = BaseModel.load("./gptj_weights")
1318
# Once the model has been finetuned, you can start doing inferences
1419
output = model.generate(texts=["Why LLM models are becoming so important?"])
1520
print("Generated output by the model: {}".format(output))

examples/llama/llama_lora_int8.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import gc
2+
13
from xturing.datasets.instruction_dataset import InstructionDataset
24
from xturing.models import BaseModel
35

@@ -11,6 +13,9 @@
1113
model.save("./llama_weights")
1214

1315
# Once the model has been finetuned, you can start doing inferences
16+
del model
17+
gc.collect()
18+
model = BaseModel.load("./llama_weights")
1419
output = model.generate(texts=["Why LLM models are becoming so important?"])
1520
print("Generated output by the model: {}".format(output))
1621

src/xturing/config/generation_config.yaml

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,8 @@ llama_lora:
2020
max_new_tokens: 256
2121
do_sample: false
2222

23-
# Contrastive search
23+
# Greedy search
2424
llama_lora_int8:
25-
penalty_alpha: 0.6
26-
top_k: 4
2725
max_new_tokens: 256
2826
do_sample: false
2927

@@ -48,10 +46,8 @@ gptj_lora:
4846
max_new_tokens: 256
4947
do_sample: false
5048

51-
# Contrastive search
49+
# Greedy search
5250
gptj_lora_int8:
53-
penalty_alpha: 0.6
54-
top_k: 4
5551
max_new_tokens: 256
5652
do_sample: false
5753

@@ -104,10 +100,8 @@ galactica_lora:
104100
max_new_tokens: 256
105101
do_sample: false
106102

107-
# Contrastive search
103+
# Greedy search
108104
galactica_lora_int8:
109-
penalty_alpha: 0.6
110-
top_k: 4
111105
max_new_tokens: 256
112106
do_sample: false
113107

@@ -125,10 +119,8 @@ opt_lora:
125119
max_new_tokens: 256
126120
do_sample: false
127121

128-
# Contrastive search
122+
# Greedy search
129123
opt_lora_int8:
130-
penalty_alpha: 0.6
131-
top_k: 4
132124
max_new_tokens: 256
133125
do_sample: false
134126

@@ -146,10 +138,8 @@ cerebras_lora:
146138
max_new_tokens: 256
147139
do_sample: false
148140

149-
# Contrastive search
141+
# Greedy search
150142
cerebras_lora_int8:
151-
penalty_alpha: 0.6
152-
top_k: 4
153143
max_new_tokens: 256
154144
do_sample: false
155145

@@ -167,9 +157,7 @@ bloom_lora:
167157
max_new_tokens: 256
168158
do_sample: false
169159

170-
# Contrastive search
160+
# Greedy search
171161
bloom_lora_int8:
172-
penalty_alpha: 0.6
173-
top_k: 4
174162
max_new_tokens: 256
175163
do_sample: false

0 commit comments

Comments
 (0)