|
78 | 78 | "!pip install -q flash-attn --no-build-isolation\n",
|
79 | 79 | "\n",
|
80 | 80 | "try:\n",
|
81 |
| - " from flash_attn.flash_attention import FlashAttention\n", |
| 81 | + " import flash_attn\n", |
82 | 82 | " print(\"FlashAttention is installed\")\n",
|
83 | 83 | " USE_FLASH_ATTENTION = True\n",
|
84 | 84 | "except ImportError:\n",
|
|
639 | 639 | "source": [
|
640 | 640 | "from transformers import BitsAndBytesConfig\n",
|
641 | 641 | "\n",
|
642 |
| - "USE_QLORA = False\n", |
643 |
| - "USE_LORA = False\n", |
| 642 | + "USE_QLORA = True\n", |
| 643 | + "USE_LORA = True\n", |
644 | 644 | "\n",
|
645 | 645 | "if USE_QLORA:\n",
|
646 | 646 | " # BitsAndBytesConfig int-4 config\n",
|
647 | 647 | " bnb_config = BitsAndBytesConfig(\n",
|
648 | 648 | " load_in_4bit=True,\n",
|
649 | 649 | " bnb_4bit_use_double_quant=True,\n",
|
650 | 650 | " bnb_4bit_quant_type=\"nf4\",\n",
|
651 |
| - " bnb_4bit_compute_dtype=torch.bfloat16\n", |
| 651 | + " bnb_4bit_compute_dtype=torch.bfloat16,\n", |
| 652 | + " llm_int8_skip_modules=[\"vision_tower\", \"lm_head\"], # Skip problematic modules\n", |
| 653 | + " llm_int8_enable_fp32_cpu_offload=True\n", |
652 | 654 | " )\n",
|
653 | 655 | "else:\n",
|
654 | 656 | " bnb_config = None\n",
|
|
693 | 695 | " r=8,\n",
|
694 | 696 | " lora_alpha=8,\n",
|
695 | 697 | " lora_dropout=0.1,\n",
|
696 |
| - " # target_modules=['down_proj','o_proj','k_proj','q_proj','gate_proj','up_proj','v_proj'],\n", |
697 | 698 | " target_modules=[name for name, _ in model.named_modules() if 'language_model' in name and '_proj' in name],\n",
|
698 | 699 | " use_dora=True,\n",
|
699 | 700 | " init_lora_weights=\"gaussian\"\n",
|
|
1052 | 1053 | "outputs": [],
|
1053 | 1054 | "source": [
|
1054 | 1055 | "if USE_LORA:\n",
|
1055 |
| - " model = model.merge_and_unload().to(torch.bfloat16)" |
| 1056 | + " from peft import PeftModel\n", |
| 1057 | + " model = PeftModel.from_pretrained(model, training_args.output_dir)" |
1056 | 1058 | ]
|
1057 | 1059 | },
|
1058 | 1060 | {
|
|
0 commit comments