23
23
import datasets
24
24
import numpy as np
25
25
import torch
26
- import transformers
27
26
from lm_eval import evaluator
28
27
from lm_eval .models .huggingface import HFLM
29
28
from torch import Tensor
30
29
from torch import nn
31
30
from torch .jit import TracerWarning
32
31
from torch .utils .tensorboard import SummaryWriter
33
- from transformers import AutoModelForCausalLM
34
- from transformers import AutoTokenizer
35
- from transformers import get_cosine_schedule_with_warmup
36
32
33
+ import transformers
37
34
from examples .llm_compression .torch .qat_with_lora .main import load_checkpoint
38
35
from examples .llm_compression .torch .qat_with_lora .main import save_checkpoint
39
36
from examples .llm_compression .torch .qat_with_lora .main import set_trainable
46
43
from nncf .torch .function_hook .wrapper import get_hook_storage
47
44
from nncf .torch .quantization .layers import AsymmetricLoraNLSQuantizer
48
45
from nncf .torch .quantization .layers import SymmetricLoraNLSQuantizer
46
+ from transformers import AutoModelForCausalLM
47
+ from transformers import AutoTokenizer
48
+ from transformers import get_cosine_schedule_with_warmup
49
49
50
50
warnings .filterwarnings ("ignore" , category = TracerWarning )
51
51
@@ -188,7 +188,10 @@ def lm_eval(model: nn.Module, tokenizer: AutoTokenizer, task: str, batch_size: i
188
188
189
189
190
190
def tokenize (
191
- tokenizer : AutoTokenizer , prompt : str , add_eos_token : bool = True , max_length : int = 256
191
+ tokenizer : AutoTokenizer ,
192
+ prompt : str ,
193
+ add_eos_token : bool = True ,
194
+ max_length : int = 256 ,
192
195
) -> dict [str , list [int ]]:
193
196
"""
194
197
Tokenize the given prompt.
@@ -324,7 +327,14 @@ def get_argument_parser() -> argparse.ArgumentParser:
324
327
parser .add_argument (
325
328
"--task" ,
326
329
type = str ,
327
- choices = ["openbookqa" , "winogrande" , "arc_challenge" , "arc_easy" , "gsm8k" , "hellaswag" ],
330
+ choices = [
331
+ "openbookqa" ,
332
+ "winogrande" ,
333
+ "arc_challenge" ,
334
+ "arc_easy" ,
335
+ "gsm8k" ,
336
+ "hellaswag" ,
337
+ ],
328
338
default = "openbookqa" ,
329
339
help = "Evaluation task" ,
330
340
)
@@ -439,7 +449,11 @@ def main(argv) -> float:
439
449
train_dataset = [tokenize (tokenizer , sample ) for sample in train_dataset ]
440
450
random .shuffle (train_dataset )
441
451
442
- model = compress_weights (model , dataset = Dataset ([model_input ]), ** compression_config )
452
+ model = compress_weights (
453
+ model ,
454
+ dataset = Dataset ([{k : v .to (device ) for k , v in model_input .items ()}]),
455
+ ** compression_config ,
456
+ )
443
457
results_of_compressed_model = lm_eval (model , tokenizer , task = args .task , batch_size = args .eval_batch_size )
444
458
print (f"Results of NNCF compressed model={ json .dumps (results_of_compressed_model , indent = 4 )} " )
445
459
overall_result ["results_of_compressed_model" ] = results_of_compressed_model
@@ -482,7 +496,9 @@ def main(argv) -> float:
482
496
else :
483
497
# Initialize the counter for tracking activation counts during training
484
498
maximal_lora_rank_config = configure_lora_adapters (
485
- layer_id_vs_lora_quantizers_map , lora_rank_space = args .lora_rank_space , adapter_strategy = "maximal"
499
+ layer_id_vs_lora_quantizers_map ,
500
+ lora_rank_space = args .lora_rank_space ,
501
+ adapter_strategy = "maximal" ,
486
502
)
487
503
activation_counter = [
488
504
{rank : 0 for rank in args .lora_rank_space } for _ in range (len (maximal_lora_rank_config ))
@@ -498,7 +514,9 @@ def main(argv) -> float:
498
514
# configure the LoRA adapters with a random rank configuration from the specified rank space.
499
515
if not disable_nls and grad_steps == 0 :
500
516
current_config = configure_lora_adapters (
501
- layer_id_vs_lora_quantizers_map , lora_rank_space = args .lora_rank_space , adapter_strategy = "random"
517
+ layer_id_vs_lora_quantizers_map ,
518
+ lora_rank_space = args .lora_rank_space ,
519
+ adapter_strategy = "random" ,
502
520
)
503
521
# Update the activation counter
504
522
for idx , rank in enumerate (current_config ):
@@ -600,12 +618,16 @@ def get_top_k_min_loss_configs(loss_recorder, k=5):
600
618
"results" : results_of_nls_finetuned_compressed_model_median ,
601
619
}
602
620
)
603
- best_result = max (best_result , results_of_nls_finetuned_compressed_model_median [args .lm_eval_metric ])
621
+ best_result = max (
622
+ best_result ,
623
+ results_of_nls_finetuned_compressed_model_median [args .lm_eval_metric ],
624
+ )
604
625
605
626
# Test the most frequent configuration
606
627
most_frequent_lora_rank_config = get_most_frequent_config (activation_counter )
607
628
configure_lora_adapters (
608
- layer_id_vs_lora_quantizers_map , specific_rank_config = most_frequent_lora_rank_config
629
+ layer_id_vs_lora_quantizers_map ,
630
+ specific_rank_config = most_frequent_lora_rank_config ,
609
631
)
610
632
results_of_nls_finetuned_compressed_model_most_frequent = lm_eval (
611
633
model , tokenizer , task = args .task , batch_size = args .eval_batch_size
@@ -621,12 +643,18 @@ def get_top_k_min_loss_configs(loss_recorder, k=5):
621
643
"results" : results_of_nls_finetuned_compressed_model_most_frequent ,
622
644
}
623
645
)
624
- best_result = max (best_result , results_of_nls_finetuned_compressed_model_most_frequent [args .lm_eval_metric ])
646
+ best_result = max (
647
+ best_result ,
648
+ results_of_nls_finetuned_compressed_model_most_frequent [args .lm_eval_metric ],
649
+ )
625
650
626
651
# Test the top 5 min loss configurations
627
652
top_5_min_loss_configs = get_top_k_min_loss_configs (loss_recorder , k = 5 )
628
653
for i , min_loss_config in enumerate (top_5_min_loss_configs ):
629
- configure_lora_adapters (layer_id_vs_lora_quantizers_map , specific_rank_config = min_loss_config )
654
+ configure_lora_adapters (
655
+ layer_id_vs_lora_quantizers_map ,
656
+ specific_rank_config = min_loss_config ,
657
+ )
630
658
results_of_nls_finetuned_compressed_model_min_loss = lm_eval (
631
659
model , tokenizer , task = args .task , batch_size = args .eval_batch_size
632
660
)
@@ -641,10 +669,16 @@ def get_top_k_min_loss_configs(loss_recorder, k=5):
641
669
"results" : results_of_nls_finetuned_compressed_model_min_loss ,
642
670
}
643
671
)
644
- best_result = max (best_result , results_of_nls_finetuned_compressed_model_min_loss [args .lm_eval_metric ])
672
+ best_result = max (
673
+ best_result ,
674
+ results_of_nls_finetuned_compressed_model_min_loss [args .lm_eval_metric ],
675
+ )
645
676
else :
646
677
assert args .custom_rank_config is not None , "Please provide `custom_rank_config` for evaluation."
647
- configure_lora_adapters (layer_id_vs_lora_quantizers_map , specific_rank_config = args .custom_rank_config )
678
+ configure_lora_adapters (
679
+ layer_id_vs_lora_quantizers_map ,
680
+ specific_rank_config = args .custom_rank_config ,
681
+ )
648
682
results_of_nls_finetuned_compressed_model_custom = lm_eval (
649
683
model , tokenizer , task = args .task , batch_size = args .eval_batch_size
650
684
)
0 commit comments