Merge pull request #176 from stochasticai/toan/fix_int8

Toan-Do · web-flow · commit d8bbb078b116 · 2023-05-01T22:19:32.000+07:00
fix: int8 fine-tuning
diff --git a/examples/gptj/gptj_lora_int8.py b/examples/gptj/gptj_lora_int8.py
@@ -1,3 +1,5 @@
+import gc
+
 from xturing.datasets.instruction_dataset import InstructionDataset
 from xturing.models import BaseModel
 
@@ -10,6 +12,9 @@
 # Save the model
 model.save("./gptj_weights")
 
+del model
+gc.collect()
+model = BaseModel.load("./gptj_weights")
 # Once the model has been finetuned, you can start doing inferences
 output = model.generate(texts=["Why LLM models are becoming so important?"])
 print("Generated output by the model: {}".format(output))
diff --git a/examples/llama/llama_lora_int8.py b/examples/llama/llama_lora_int8.py
@@ -1,3 +1,5 @@
+import gc
+
 from xturing.datasets.instruction_dataset import InstructionDataset
 from xturing.models import BaseModel
 
@@ -11,6 +13,9 @@
 model.save("./llama_weights")
 
 # Once the model has been finetuned, you can start doing inferences
+del model
+gc.collect()
+model = BaseModel.load("./llama_weights")
 output = model.generate(texts=["Why LLM models are becoming so important?"])
 print("Generated output by the model: {}".format(output))
 
diff --git a/src/xturing/config/generation_config.yaml b/src/xturing/config/generation_config.yaml
@@ -20,10 +20,8 @@ llama_lora:
   max_new_tokens: 256
   do_sample: false
 
-# Contrastive search
+# Greedy search
 llama_lora_int8:
-  penalty_alpha: 0.6
-  top_k: 4
   max_new_tokens: 256
   do_sample: false
 
@@ -48,10 +46,8 @@ gptj_lora:
   max_new_tokens: 256
   do_sample: false
 
-# Contrastive search
+# Greedy search
 gptj_lora_int8:
-  penalty_alpha: 0.6
-  top_k: 4
   max_new_tokens: 256
   do_sample: false
 
@@ -104,10 +100,8 @@ galactica_lora:
   max_new_tokens: 256
   do_sample: false
 
-# Contrastive search
+# Greedy search
 galactica_lora_int8:
-  penalty_alpha: 0.6
-  top_k: 4
   max_new_tokens: 256
   do_sample: false
 
@@ -125,10 +119,8 @@ opt_lora:
   max_new_tokens: 256
   do_sample: false
 
-# Contrastive search
+# Greedy search
 opt_lora_int8:
-  penalty_alpha: 0.6
-  top_k: 4
   max_new_tokens: 256
   do_sample: false
 
@@ -146,10 +138,8 @@ cerebras_lora:
   max_new_tokens: 256
   do_sample: false
 
-# Contrastive search
+# Greedy search
 cerebras_lora_int8:
-  penalty_alpha: 0.6
-  top_k: 4
   max_new_tokens: 256
   do_sample: false
 
@@ -167,9 +157,7 @@ bloom_lora:
   max_new_tokens: 256
   do_sample: false
 
-# Contrastive search
+# Greedy search
 bloom_lora_int8:
-  penalty_alpha: 0.6
-  top_k: 4
   max_new_tokens: 256
   do_sample: false