From 5c733496c3e2d2db797465c7cd86dd7a15f29e5f Mon Sep 17 00:00:00 2001 From: TongmengXie <114957703+TongmengXie@users.noreply.github.com> Date: Sat, 23 Mar 2024 12:46:23 +0000 Subject: [PATCH 01/11] Update finetune_lora.sh For colab accessing from /content working diretory --- scripts/finetune_lora.sh | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/scripts/finetune_lora.sh b/scripts/finetune_lora.sh index fc02e09d7..c23339843 100644 --- a/scripts/finetune_lora.sh +++ b/scripts/finetune_lora.sh @@ -14,20 +14,22 @@ # MODEL_VERSION="llama-2-7b-chat" ################## LLaMA-2 ################## -deepspeed llava/train/train_mem.py \ - --deepspeed ./scripts/zero2.json \ - --lora_enable True \ - --model_name_or_path ./checkpoints/$MODEL_VERSION \ - --version $PROMPT_VERSION \ - --data_path ./playground/data/llava_instruct_80k.json \ - --image_folder /path/to/coco/train2017 \ - --vision_tower openai/clip-vit-large-patch14 \ - --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \ +!deepspeed LLaVA/llava/train/train_mem.py \ + --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ + --deepspeed LLaVA/scripts/zero3.json \ + --model_name_or_path liuhaotian/llava-v1.5-13b \ + --version v1 \ + --data_path LLaVA/playground/data/llava_v1_5_mix665k.json \ + --image_folder LLaVA/playground/data \ + --vision_tower openai/clip-vit-large-patch14-336 \ + --mm_projector_type mlp2x_gelu \ --mm_vision_select_layer -2 \ --mm_use_im_start_end False \ --mm_use_im_patch_token False \ + --image_aspect_ratio pad \ + --group_by_modality_length True \ --bf16 True \ - --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune_lora \ + --output_dir LLaVA/checkpoints/llava-v1.5-13b-task-lora \ --num_train_epochs 1 \ --per_device_train_batch_size 16 \ --per_device_eval_batch_size 4 \ @@ -36,7 +38,7 @@ deepspeed llava/train/train_mem.py \ --save_strategy "steps" \ --save_steps 50000 \ --save_total_limit 1 \ - --learning_rate 2e-5 \ + --learning_rate 2e-4 \ --weight_decay 0. \ --warmup_ratio 0.03 \ --lr_scheduler_type "cosine" \ @@ -44,6 +46,6 @@ deepspeed llava/train/train_mem.py \ --tf32 True \ --model_max_length 2048 \ --gradient_checkpointing True \ - --lazy_preprocess True \ --dataloader_num_workers 4 \ + --lazy_preprocess True \ --report_to wandb From 7c678312915964df881f8dcb9c9eeabffd7a6edc Mon Sep 17 00:00:00 2001 From: TongmengXie <114957703+TongmengXie@users.noreply.github.com> Date: Sat, 23 Mar 2024 12:48:01 +0000 Subject: [PATCH 02/11] Update train_mem.py '''For colab accessing from /content working diretory''' --- llava/train/train_mem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llava/train/train_mem.py b/llava/train/train_mem.py index 29ea06170..425a46f25 100644 --- a/llava/train/train_mem.py +++ b/llava/train/train_mem.py @@ -1,4 +1,4 @@ -from llava.train.train import train +from train import train if __name__ == "__main__": train(attn_implementation="flash_attention_2") From 096405c866f25a539aa38829c48814aa8fb3320e Mon Sep 17 00:00:00 2001 From: TongmengXie <114957703+TongmengXie@users.noreply.github.com> Date: Sat, 23 Mar 2024 13:14:37 +0000 Subject: [PATCH 03/11] Update train.py Colab import adaptation --- llava/train/train.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/llava/train/train.py b/llava/train/train.py index 477c668b6..c7dd8c8bd 100644 --- a/llava/train/train.py +++ b/llava/train/train.py @@ -15,6 +15,8 @@ # limitations under the License. import os +import sys +sys.path.append('/content/LLaVA/llava/') import copy from dataclasses import dataclass, field import json @@ -27,13 +29,13 @@ import transformers import tokenizers -from llava.constants import IGNORE_INDEX, IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN +from constants import IGNORE_INDEX, IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN from torch.utils.data import Dataset -from llava.train.llava_trainer import LLaVATrainer +from llava_trainer import LLaVATrainer -from llava import conversation as conversation_lib -from llava.model import * -from llava.mm_utils import tokenizer_image_token +import conversation as conversation_lib +from model import * +from mm_utils import tokenizer_image_token from PIL import Image From 0f2e2150730e6b3ee37491ad6899a2ce69579a50 Mon Sep 17 00:00:00 2001 From: TongmengXie <114957703+TongmengXie@users.noreply.github.com> Date: Sat, 23 Mar 2024 13:15:28 +0000 Subject: [PATCH 04/11] Update mm_utils.py Colab adaptations --- llava/mm_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llava/mm_utils.py b/llava/mm_utils.py index de97345cf..7766737f5 100644 --- a/llava/mm_utils.py +++ b/llava/mm_utils.py @@ -5,9 +5,10 @@ import math import ast +import sys +sys.path.append('/content/LLaVA/llava/') from transformers import StoppingCriteria -from llava.constants import IMAGE_TOKEN_INDEX - +from constants import IMAGE_TOKEN_INDEX def select_best_resolution(original_size, possible_resolutions): """ From 72259998a4b42a3fa00af79354c5dfc0381d74d2 Mon Sep 17 00:00:00 2001 From: TongmengXie <114957703+TongmengXie@users.noreply.github.com> Date: Sat, 23 Mar 2024 17:58:48 +0000 Subject: [PATCH 05/11] Add files via upload Tested to be running in Colab (2024.03.23) On T4 GPU, without bf16 or tf32 --- finetune.ipynb | 274 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 274 insertions(+) create mode 100644 finetune.ipynb diff --git a/finetune.ipynb b/finetune.ipynb new file mode 100644 index 000000000..f4fd005e8 --- /dev/null +++ b/finetune.ipynb @@ -0,0 +1,274 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true, + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# pre-run" + ], + "metadata": { + "id": "QkGSpvuMcCjQ" + } + }, + { + "cell_type": "code", + "source": [ + "# get ollama\n", + "!sudo apt-get install -y pciutils\n", + "!curl https://ollama.ai/install.sh | sh\n", + "import pandas as pd\n", + "\n", + "# dependencies for fine-tuning\n", + "!pip install accelerate -U -q\n", + "!pip install deepspeed -q\n", + "!pip install flash-attn --no-build-isolation -q # flash attenion and for vision-tower" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WlYzUFFmSlXd", + "outputId": "9d6178cb-6710-45a5-96e9-3c3a9dd5816d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting flash-attn\n", + " Downloading flash_attn-2.5.6.tar.gz (2.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.5/2.5 MB\u001b[0m \u001b[31m17.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from flash-attn) (2.2.1+cu121)\n", + "Collecting einops (from flash-attn)\n", + " Downloading einops-0.7.0-py3-none-any.whl (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.6/44.6 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from flash-attn) (24.0)\n", + "Requirement already satisfied: ninja in /usr/local/lib/python3.10/dist-packages (from flash-attn) (1.11.1.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.13.1)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (4.10.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.1.3)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2023.6.0)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (8.9.2.26)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2.19.3)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n", + "Requirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2.2.0)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch->flash-attn) (12.4.99)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->flash-attn) (2.1.5)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->flash-attn) (1.3.0)\n", + "Building wheels for collected packages: flash-attn\n", + " Building wheel for flash-attn (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for flash-attn: filename=flash_attn-2.5.6-cp310-cp310-linux_x86_64.whl size=120592258 sha256=d8cf54adda65f59820221d329d274e124972d7fdc05ab3b1130253c64eee6c8a\n", + " Stored in directory: /root/.cache/pip/wheels/a8/1c/88/b959d6818b98a46d61ba231683abb7523b89ac1a7ed1e0c206\n", + "Successfully built flash-attn\n", + "Installing collected packages: einops, flash-attn\n", + "Successfully installed einops-0.7.0 flash-attn-2.5.6\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3EQAh7C5LIjP" + }, + "outputs": [], + "source": [ + "# clone from myself\n", + "!git clone https://github.com/Tongmengxie/LLaVA.git\n", + "\n", + "# optional: clone from public repo after my pull request merged\n", + "# %cd LLaVA\n", + "# !git clone https://github.com/haotian-liu/LLaVA.git\n", + "# !git checkout colab\n", + "# %cd ..\n", + "# %pwd\n", + "!mkdir LLaVA/checkpoints/llava-v1.5-13b-task-lora" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Input & Output Directories" + ], + "metadata": { + "id": "r29HVHeBcEes" + } + }, + { + "cell_type": "code", + "source": [ + "# data_path\n", + "# DATA_PATH = 'LLaVA/playground/data/llava_v1_5_mix665k.json'\n", + "DATA_PATH = 'path_to_data' # see for format: https://github.com/haotian-liu/LLaVA/blob/main/docs/Finetune_Custom_Data.md\n", + "\n", + "# image_folder\n", + "# IMAGE_FOLDER = 'LLaVA/playground/data'\n", + "IMAGE_FOLDER = 'path_to_images'" + ], + "metadata": { + "id": "zE4YhiPTcOlP" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!deepspeed LLaVA/llava/train/train_mem.py \\\n", + " --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \\\n", + " --deepspeed LLaVA/scripts/zero3.json \\\n", + " --model_name_or_path liuhaotian/llava-v1.5-13b \\\n", + " --version v1 \\\n", + " --data_path DATA_PATH \\\n", + " --image_folder IMAGE_FOLDER \\\n", + " --mm_projector_type mlp2x_gelu \\\n", + " --mm_vision_select_layer -2 \\\n", + " --mm_use_im_start_end False \\\n", + " --mm_use_im_patch_token False \\\n", + " --image_aspect_ratio pad \\\n", + " --group_by_modality_length True \\\n", + " --bf16 False \\\n", + " --output_dir LLaVA/checkpoints/llava-v1.5-13b-task-lora \\\n", + " --num_train_epochs 1 \\\n", + " --per_device_train_batch_size 16 \\\n", + " --per_device_eval_batch_size 4 \\\n", + " --gradient_accumulation_steps 1 \\\n", + " --evaluation_strategy \"no\" \\\n", + " --save_strategy \"steps\" \\\n", + " --save_steps 50000 \\\n", + " --save_total_limit 1 \\\n", + " --learning_rate 2e-4 \\\n", + " --weight_decay 0. \\\n", + " --warmup_ratio 0.03 \\\n", + " --lr_scheduler_type \"cosine\" \\\n", + " --logging_steps 1 \\\n", + " --tf32 False \\\n", + " --model_max_length 2048 \\\n", + " --gradient_checkpointing True \\\n", + " --dataloader_num_workers 4 \\\n", + " --lazy_preprocess True \\\n", + " --report_to wandb\n", + " # --vision_tower openai/clip-vit-large-patch14-336 \\" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ssSg2wZ9M9zc", + "outputId": "fc1eded3-71c4-467c-e3b0-07ed208f70fb" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[2024-03-23 17:49:06,369] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-03-23 17:49:10,197] [WARNING] [runner.py:202:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.\n", + "[2024-03-23 17:49:10,211] [INFO] [runner.py:568:main] cmd = /usr/bin/python3 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMF19 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None LLaVA/llava/train/train_mem.py --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 --deepspeed LLaVA/scripts/zero3.json --model_name_or_path liuhaotian/llava-v1.5-13b --version v1 --data_path LLaVA/playground/data/llava_v1_5_mix665k.json --image_folder LLaVA/playground/data --mm_projector_type mlp2x_gelu --mm_vision_select_layer -2 --mm_use_im_start_end False --mm_use_im_patch_token False --image_aspect_ratio pad --group_by_modality_length True --bf16 False --output_dir LLaVA/checkpoints/llava-v1.5-13b-task-lora --num_train_epochs 1 --per_device_train_batch_size 16 --per_device_eval_batch_size 4 --gradient_accumulation_steps 1 --evaluation_strategy no --save_strategy steps --save_steps 50000 --save_total_limit 1 --learning_rate 2e-4 --weight_decay 0. --warmup_ratio 0.03 --lr_scheduler_type cosine --logging_steps 1 --tf32 False --model_max_length 2048 --gradient_checkpointing True --dataloader_num_workers 4 --lazy_preprocess True --report_to wandb\n", + "[2024-03-23 17:49:15,047] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-03-23 17:49:18,960] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_DEV_PACKAGE=libnccl-dev=2.19.3-1+cuda12.2\n", + "[2024-03-23 17:49:18,960] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_DEV_PACKAGE_VERSION=2.19.3-1\n", + "[2024-03-23 17:49:18,960] [INFO] [launch.py:138:main] 0 NCCL_VERSION=2.19.3-1\n", + "[2024-03-23 17:49:18,961] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_DEV_PACKAGE_NAME=libnccl-dev\n", + "[2024-03-23 17:49:18,961] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_PACKAGE=libnccl2=2.19.3-1+cuda12.2\n", + "[2024-03-23 17:49:18,961] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_PACKAGE_NAME=libnccl2\n", + "[2024-03-23 17:49:18,961] [INFO] [launch.py:138:main] 0 NV_LIBNCCL_PACKAGE_VERSION=2.19.3-1\n", + "[2024-03-23 17:49:18,961] [INFO] [launch.py:145:main] WORLD INFO DICT: {'localhost': [0]}\n", + "[2024-03-23 17:49:18,961] [INFO] [launch.py:151:main] nnodes=1, num_local_procs=1, node_rank=0\n", + "[2024-03-23 17:49:18,961] [INFO] [launch.py:162:main] global_rank_mapping=defaultdict(, {'localhost': [0]})\n", + "[2024-03-23 17:49:18,961] [INFO] [launch.py:163:main] dist_world_size=1\n", + "[2024-03-23 17:49:18,961] [INFO] [launch.py:165:main] Setting CUDA_VISIBLE_DEVICES=0\n", + "[2024-03-23 17:49:18,962] [INFO] [launch.py:253:main] process 20970 spawned with command: ['/usr/bin/python3', '-u', 'LLaVA/llava/train/train_mem.py', '--local_rank=0', '--lora_enable', 'True', '--lora_r', '128', '--lora_alpha', '256', '--mm_projector_lr', '2e-5', '--deepspeed', 'LLaVA/scripts/zero3.json', '--model_name_or_path', 'liuhaotian/llava-v1.5-13b', '--version', 'v1', '--data_path', 'LLaVA/playground/data/llava_v1_5_mix665k.json', '--image_folder', 'LLaVA/playground/data', '--mm_projector_type', 'mlp2x_gelu', '--mm_vision_select_layer', '-2', '--mm_use_im_start_end', 'False', '--mm_use_im_patch_token', 'False', '--image_aspect_ratio', 'pad', '--group_by_modality_length', 'True', '--bf16', 'False', '--output_dir', 'LLaVA/checkpoints/llava-v1.5-13b-task-lora', '--num_train_epochs', '1', '--per_device_train_batch_size', '16', '--per_device_eval_batch_size', '4', '--gradient_accumulation_steps', '1', '--evaluation_strategy', 'no', '--save_strategy', 'steps', '--save_steps', '50000', '--save_total_limit', '1', '--learning_rate', '2e-4', '--weight_decay', '0.', '--warmup_ratio', '0.03', '--lr_scheduler_type', 'cosine', '--logging_steps', '1', '--tf32', 'False', '--model_max_length', '2048', '--gradient_checkpointing', 'True', '--dataloader_num_workers', '4', '--lazy_preprocess', 'True', '--report_to', 'wandb']\n", + "2024-03-23 17:49:23.673937: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-03-23 17:49:23.673984: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-03-23 17:49:23.675371: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-03-23 17:49:24.858169: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "[2024-03-23 17:49:26,104] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", + "[2024-03-23 17:49:26,349] [INFO] [comm.py:637:init_distributed] cdb=None\n", + "[2024-03-23 17:49:26,349] [INFO] [comm.py:668:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl\n", + "You are using a model of type llava to instantiate a model of type llama. This is not supported for all configurations of models and can yield errors.\n", + "You are attempting to use Flash Attention 2.0 without specifying a torch dtype. This might lead to unexpected behaviour\n", + "You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.\n", + "Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaForCausalLM is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained(\"openai/whisper-tiny\", attn_implementation=\"flash_attention_2\", torch_dtype=torch.float16)`\n", + "Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaModel is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained(\"openai/whisper-tiny\", attn_implementation=\"flash_attention_2\", torch_dtype=torch.float16)`\n", + "[2024-03-23 17:49:27,941] [INFO] [partition_parameters.py:343:__exit__] finished initializing model - num_params = 97, num_elems = 3.58B\n", + "Traceback (most recent call last):\n", + " File \"/content/LLaVA/llava/train/train_mem.py\", line 4, in \n", + " train(attn_implementation=\"flash_attention_2\")\n", + " File \"/content/LLaVA/llava/train/train.py\", line 837, in train\n", + " model = transformers.LlamaForCausalLM.from_pretrained(\n", + " File \"/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py\", line 3375, in from_pretrained\n", + " model = cls(config, *model_args, **model_kwargs)\n", + " File \"/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py\", line 503, in wrapper\n", + " f(module, *args, **kwargs)\n", + " File \"/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py\", line 1103, in __init__\n", + " self.model = LlamaModel(config)\n", + " File \"/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py\", line 503, in wrapper\n", + " f(module, *args, **kwargs)\n", + " File \"/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py\", line 924, in __init__\n", + " [LlamaDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]\n", + " File \"/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py\", line 924, in \n", + " [LlamaDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]\n", + " File \"/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py\", line 503, in wrapper\n", + " f(module, *args, **kwargs)\n", + " File \"/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py\", line 701, in __init__\n", + " self.mlp = LlamaMLP(config)\n", + " File \"/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py\", line 503, in wrapper\n", + " f(module, *args, **kwargs)\n", + " File \"/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py\", line 219, in __init__\n", + " self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)\n", + " File \"/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py\", line 513, in wrapper\n", + " self._post_init_method(module)\n", + " File \"/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py\", line 1056, in _post_init_method\n", + " self._zero_init_param(param)\n", + " File \"/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py\", line 1015, in _zero_init_param\n", + " param.partition()\n", + " File \"/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py\", line 1326, in partition\n", + " self._partition(param_list, has_been_updated=has_been_updated)\n", + " File \"/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py\", line 1475, in _partition\n", + " self._partition_param(param, has_been_updated=has_been_updated)\n", + " File \"/usr/local/lib/python3.10/dist-packages/deepspeed/utils/nvtx.py\", line 15, in wrapped_fn\n", + " ret_val = func(*args, **kwargs)\n", + " File \"/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py\", line 1540, in _partition_param\n", + " partitioned_tensor = torch.empty(partition_size, dtype=param.dtype, device=device)\n", + " File \"/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/zero/partition_parameters.py\", line 238, in wrapped_fn\n", + " tensor: Tensor = fn(*args, **kwargs)\n", + "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 270.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 65.06 MiB is free. Process 192148 has 14.68 GiB memory in use. Of the allocated memory 13.39 GiB is allocated by PyTorch, and 883.60 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "[2024-03-23 17:49:29,972] [INFO] [launch.py:316:sigkill_handler] Killing subprocess 20970\n", + "[2024-03-23 17:49:29,972] [ERROR] [launch.py:322:sigkill_handler] ['/usr/bin/python3', '-u', 'LLaVA/llava/train/train_mem.py', '--local_rank=0', '--lora_enable', 'True', '--lora_r', '128', '--lora_alpha', '256', '--mm_projector_lr', '2e-5', '--deepspeed', 'LLaVA/scripts/zero3.json', '--model_name_or_path', 'liuhaotian/llava-v1.5-13b', '--version', 'v1', '--data_path', 'LLaVA/playground/data/llava_v1_5_mix665k.json', '--image_folder', 'LLaVA/playground/data', '--mm_projector_type', 'mlp2x_gelu', '--mm_vision_select_layer', '-2', '--mm_use_im_start_end', 'False', '--mm_use_im_patch_token', 'False', '--image_aspect_ratio', 'pad', '--group_by_modality_length', 'True', '--bf16', 'False', '--output_dir', 'LLaVA/checkpoints/llava-v1.5-13b-task-lora', '--num_train_epochs', '1', '--per_device_train_batch_size', '16', '--per_device_eval_batch_size', '4', '--gradient_accumulation_steps', '1', '--evaluation_strategy', 'no', '--save_strategy', 'steps', '--save_steps', '50000', '--save_total_limit', '1', '--learning_rate', '2e-4', '--weight_decay', '0.', '--warmup_ratio', '0.03', '--lr_scheduler_type', 'cosine', '--logging_steps', '1', '--tf32', 'False', '--model_max_length', '2048', '--gradient_checkpointing', 'True', '--dataloader_num_workers', '4', '--lazy_preprocess', 'True', '--report_to', 'wandb'] exits with return code = 1\n" + ] + } + ] + } + ] +} \ No newline at end of file From bb3aa1232679a0e762e0df8c75248940543f38fa Mon Sep 17 00:00:00 2001 From: TongmengXie <114957703+TongmengXie@users.noreply.github.com> Date: Sat, 23 Mar 2024 18:00:29 +0000 Subject: [PATCH 06/11] Runs on Colab T4 GPU, without bf16 or tf32 --- finetune.ipynb | 182 ++++++++++++++++++++++++++----------------------- 1 file changed, 98 insertions(+), 84 deletions(-) diff --git a/finetune.ipynb b/finetune.ipynb index f4fd005e8..1422d990d 100644 --- a/finetune.ipynb +++ b/finetune.ipynb @@ -1,44 +1,31 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "toc_visible": true, - "gpuType": "T4" - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU" - }, "cells": [ { "cell_type": "markdown", + "metadata": {}, "source": [ - "# pre-run" - ], + "Tested to be running in Colab (2024.03.23) On T4 GPU, without bf16 or tf32" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Author: https://github.com/TongmengXie/" + ] + }, + { + "cell_type": "markdown", "metadata": { "id": "QkGSpvuMcCjQ" - } + }, + "source": [ + "# pre-run" + ] }, { "cell_type": "code", - "source": [ - "# get ollama\n", - "!sudo apt-get install -y pciutils\n", - "!curl https://ollama.ai/install.sh | sh\n", - "import pandas as pd\n", - "\n", - "# dependencies for fine-tuning\n", - "!pip install accelerate -U -q\n", - "!pip install deepspeed -q\n", - "!pip install flash-attn --no-build-isolation -q # flash attenion and for vision-tower" - ], + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -46,11 +33,10 @@ "id": "WlYzUFFmSlXd", "outputId": "9d6178cb-6710-45a5-96e9-3c3a9dd5816d" }, - "execution_count": null, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Collecting flash-attn\n", " Downloading flash_attn-2.5.6.tar.gz (2.5 MB)\n", @@ -92,6 +78,17 @@ "Successfully installed einops-0.7.0 flash-attn-2.5.6\n" ] } + ], + "source": [ + "# get ollama\n", + "!sudo apt-get install -y pciutils\n", + "!curl https://ollama.ai/install.sh | sh\n", + "import pandas as pd\n", + "\n", + "# dependencies for fine-tuning\n", + "!pip install accelerate -U -q\n", + "!pip install deepspeed -q\n", + "!pip install flash-attn --no-build-isolation -q # flash attenion and for vision-tower" ] }, { @@ -116,15 +113,20 @@ }, { "cell_type": "markdown", - "source": [ - "# Input & Output Directories" - ], "metadata": { "id": "r29HVHeBcEes" - } + }, + "source": [ + "# Input & Output Directories" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zE4YhiPTcOlP" + }, + "outputs": [], "source": [ "# data_path\n", "# DATA_PATH = 'LLaVA/playground/data/llava_v1_5_mix665k.json'\n", @@ -133,52 +135,11 @@ "# image_folder\n", "# IMAGE_FOLDER = 'LLaVA/playground/data'\n", "IMAGE_FOLDER = 'path_to_images'" - ], - "metadata": { - "id": "zE4YhiPTcOlP" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "!deepspeed LLaVA/llava/train/train_mem.py \\\n", - " --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \\\n", - " --deepspeed LLaVA/scripts/zero3.json \\\n", - " --model_name_or_path liuhaotian/llava-v1.5-13b \\\n", - " --version v1 \\\n", - " --data_path DATA_PATH \\\n", - " --image_folder IMAGE_FOLDER \\\n", - " --mm_projector_type mlp2x_gelu \\\n", - " --mm_vision_select_layer -2 \\\n", - " --mm_use_im_start_end False \\\n", - " --mm_use_im_patch_token False \\\n", - " --image_aspect_ratio pad \\\n", - " --group_by_modality_length True \\\n", - " --bf16 False \\\n", - " --output_dir LLaVA/checkpoints/llava-v1.5-13b-task-lora \\\n", - " --num_train_epochs 1 \\\n", - " --per_device_train_batch_size 16 \\\n", - " --per_device_eval_batch_size 4 \\\n", - " --gradient_accumulation_steps 1 \\\n", - " --evaluation_strategy \"no\" \\\n", - " --save_strategy \"steps\" \\\n", - " --save_steps 50000 \\\n", - " --save_total_limit 1 \\\n", - " --learning_rate 2e-4 \\\n", - " --weight_decay 0. \\\n", - " --warmup_ratio 0.03 \\\n", - " --lr_scheduler_type \"cosine\" \\\n", - " --logging_steps 1 \\\n", - " --tf32 False \\\n", - " --model_max_length 2048 \\\n", - " --gradient_checkpointing True \\\n", - " --dataloader_num_workers 4 \\\n", - " --lazy_preprocess True \\\n", - " --report_to wandb\n", - " # --vision_tower openai/clip-vit-large-patch14-336 \\" - ], + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -186,11 +147,10 @@ "id": "ssSg2wZ9M9zc", "outputId": "fc1eded3-71c4-467c-e3b0-07ed208f70fb" }, - "execution_count": null, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "[2024-03-23 17:49:06,369] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "[2024-03-23 17:49:10,197] [WARNING] [runner.py:202:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.\n", @@ -268,7 +228,61 @@ "[2024-03-23 17:49:29,972] [ERROR] [launch.py:322:sigkill_handler] ['/usr/bin/python3', '-u', 'LLaVA/llava/train/train_mem.py', '--local_rank=0', '--lora_enable', 'True', '--lora_r', '128', '--lora_alpha', '256', '--mm_projector_lr', '2e-5', '--deepspeed', 'LLaVA/scripts/zero3.json', '--model_name_or_path', 'liuhaotian/llava-v1.5-13b', '--version', 'v1', '--data_path', 'LLaVA/playground/data/llava_v1_5_mix665k.json', '--image_folder', 'LLaVA/playground/data', '--mm_projector_type', 'mlp2x_gelu', '--mm_vision_select_layer', '-2', '--mm_use_im_start_end', 'False', '--mm_use_im_patch_token', 'False', '--image_aspect_ratio', 'pad', '--group_by_modality_length', 'True', '--bf16', 'False', '--output_dir', 'LLaVA/checkpoints/llava-v1.5-13b-task-lora', '--num_train_epochs', '1', '--per_device_train_batch_size', '16', '--per_device_eval_batch_size', '4', '--gradient_accumulation_steps', '1', '--evaluation_strategy', 'no', '--save_strategy', 'steps', '--save_steps', '50000', '--save_total_limit', '1', '--learning_rate', '2e-4', '--weight_decay', '0.', '--warmup_ratio', '0.03', '--lr_scheduler_type', 'cosine', '--logging_steps', '1', '--tf32', 'False', '--model_max_length', '2048', '--gradient_checkpointing', 'True', '--dataloader_num_workers', '4', '--lazy_preprocess', 'True', '--report_to', 'wandb'] exits with return code = 1\n" ] } + ], + "source": [ + "!deepspeed LLaVA/llava/train/train_mem.py \\\n", + " --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \\\n", + " --deepspeed LLaVA/scripts/zero3.json \\\n", + " --model_name_or_path liuhaotian/llava-v1.5-13b \\\n", + " --version v1 \\\n", + " --data_path DATA_PATH \\\n", + " --image_folder IMAGE_FOLDER \\\n", + " --mm_projector_type mlp2x_gelu \\\n", + " --mm_vision_select_layer -2 \\\n", + " --mm_use_im_start_end False \\\n", + " --mm_use_im_patch_token False \\\n", + " --image_aspect_ratio pad \\\n", + " --group_by_modality_length True \\\n", + " --bf16 False \\\n", + " --output_dir LLaVA/checkpoints/llava-v1.5-13b-task-lora \\\n", + " --num_train_epochs 1 \\\n", + " --per_device_train_batch_size 16 \\\n", + " --per_device_eval_batch_size 4 \\\n", + " --gradient_accumulation_steps 1 \\\n", + " --evaluation_strategy \"no\" \\\n", + " --save_strategy \"steps\" \\\n", + " --save_steps 50000 \\\n", + " --save_total_limit 1 \\\n", + " --learning_rate 2e-4 \\\n", + " --weight_decay 0. \\\n", + " --warmup_ratio 0.03 \\\n", + " --lr_scheduler_type \"cosine\" \\\n", + " --logging_steps 1 \\\n", + " --tf32 False \\\n", + " --model_max_length 2048 \\\n", + " --gradient_checkpointing True \\\n", + " --dataloader_num_workers 4 \\\n", + " --lazy_preprocess True \\\n", + " --report_to wandb\n", + " # --vision_tower openai/clip-vit-large-patch14-336 \\" ] } - ] -} \ No newline at end of file + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 5b4482434614a6168063d9d32e3e4fb45822ed70 Mon Sep 17 00:00:00 2001 From: TongmengXie <114957703+TongmengXie@users.noreply.github.com> Date: Sat, 23 Mar 2024 18:05:54 +0000 Subject: [PATCH 07/11] Update finetune_lora.sh From 2ee12a5a868984e68435a3f8e2376f946dc2487b Mon Sep 17 00:00:00 2001 From: TongmengXie <114957703+TongmengXie@users.noreply.github.com> Date: Sat, 23 Mar 2024 18:13:45 +0000 Subject: [PATCH 08/11] Update finetune_lora.sh --- scripts/finetune_lora.sh | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/scripts/finetune_lora.sh b/scripts/finetune_lora.sh index c23339843..c2ed4c030 100644 --- a/scripts/finetune_lora.sh +++ b/scripts/finetune_lora.sh @@ -14,22 +14,21 @@ # MODEL_VERSION="llama-2-7b-chat" ################## LLaMA-2 ################## -!deepspeed LLaVA/llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed LLaVA/scripts/zero3.json \ - --model_name_or_path liuhaotian/llava-v1.5-13b \ - --version v1 \ - --data_path LLaVA/playground/data/llava_v1_5_mix665k.json \ - --image_folder LLaVA/playground/data \ - --vision_tower openai/clip-vit-large-patch14-336 \ - --mm_projector_type mlp2x_gelu \ +deepspeed llava/train/train_mem.py \ + --deepspeed ./scripts/zero2.json \ + --lora_enable True \ + --bits 4 \ + --model_name_or_path ./checkpoints/$MODEL_VERSION \ + --version $PROMPT_VERSION \ + --data_path ./playground/data/llava_instruct_80k.json \ + --image_folder /path/to/coco/train2017 \ + --vision_tower openai/clip-vit-large-patch14 \ + --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \ --mm_vision_select_layer -2 \ --mm_use_im_start_end False \ --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ --bf16 True \ - --output_dir LLaVA/checkpoints/llava-v1.5-13b-task-lora \ + --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune_lora \ --num_train_epochs 1 \ --per_device_train_batch_size 16 \ --per_device_eval_batch_size 4 \ @@ -38,7 +37,7 @@ --save_strategy "steps" \ --save_steps 50000 \ --save_total_limit 1 \ - --learning_rate 2e-4 \ + --learning_rate 2e-5 \ --weight_decay 0. \ --warmup_ratio 0.03 \ --lr_scheduler_type "cosine" \ @@ -46,6 +45,6 @@ --tf32 True \ --model_max_length 2048 \ --gradient_checkpointing True \ - --dataloader_num_workers 4 \ --lazy_preprocess True \ + --dataloader_num_workers 4 \ --report_to wandb From e799ceaa56adfc8ddb44bf79ad398891314dc283 Mon Sep 17 00:00:00 2001 From: TongmengXie <114957703+TongmengXie@users.noreply.github.com> Date: Sat, 23 Mar 2024 18:16:05 +0000 Subject: [PATCH 09/11] paths for colab file system structure --- llava/mm_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llava/mm_utils.py b/llava/mm_utils.py index 7766737f5..374829843 100644 --- a/llava/mm_utils.py +++ b/llava/mm_utils.py @@ -6,7 +6,11 @@ import ast import sys -sys.path.append('/content/LLaVA/llava/') +try: + sys.path.append('/content/LLaVA/llava/') +except: + pass + from transformers import StoppingCriteria from constants import IMAGE_TOKEN_INDEX From c8aa2f73c3676fae2b6c3214806ebae146feaea0 Mon Sep 17 00:00:00 2001 From: TongmengXie <114957703+TongmengXie@users.noreply.github.com> Date: Sat, 23 Mar 2024 18:16:26 +0000 Subject: [PATCH 10/11] Update train.py --- llava/train/train.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llava/train/train.py b/llava/train/train.py index c7dd8c8bd..b2552360f 100644 --- a/llava/train/train.py +++ b/llava/train/train.py @@ -16,7 +16,10 @@ import os import sys -sys.path.append('/content/LLaVA/llava/') +try: + sys.path.append('/content/LLaVA/llava/') +except: + pass import copy from dataclasses import dataclass, field import json From 3963210b2a957472cf0bfd3470c9b61dc6aeb1e2 Mon Sep 17 00:00:00 2001 From: TongmengXie <114957703+TongmengXie@users.noreply.github.com> Date: Sat, 23 Mar 2024 18:17:45 +0000 Subject: [PATCH 11/11] Update train_mem.py --- llava/train/train_mem.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llava/train/train_mem.py b/llava/train/train_mem.py index 425a46f25..048913afc 100644 --- a/llava/train/train_mem.py +++ b/llava/train/train_mem.py @@ -1,4 +1,8 @@ -from train import train + +try: + from llava.train.train import train +except: # e.g., on colab + from train import train if __name__ == "__main__": train(attn_implementation="flash_attention_2")