Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ api = [
"pydantic-settings~=2.8",
"python-dotenv~=1.0",
]
video = [
"decord~=0.6.0",
"opencv-python-headless~=4.11",
]

# TODO: adds project urls
# [project.urls]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ gpu_ids: "0,1,2,3,4,5,6,7"
num_processes: 8 # should be the same as the number of GPUs

# gpu_ids: "0"
# num_processes: 1 # should be the same as the number of GPUs
# num_processes: 1

debug: false

distributed_type: DEEPSPEED
deepspeed_config:
deepspeed_config_file: ../configs/zero/zero3.yaml # e.g. configs/zero2.yaml, need use absolute path
deepspeed_config_file: /path/to/configs/zero/zero2.yaml # e.g. need use absolute path
zero3_init_flag: false

downcast_bf16: 'no'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import argparse
import sys
from pathlib import Path

sys.path.append(str(Path(__file__).parent.parent))

from cogkit.finetune import get_model_cls

Expand Down
1 change: 0 additions & 1 deletion quickstart/scripts/train_ddp_t2i.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ CHECKPOINT_ARGS=(
VALIDATION_ARGS=(
--do_validation true # ["true", "false"]
--validation_steps 10 # should be multiple of checkpointing_steps
--gen_fps 16
)

# Combine all arguments and launch training
Expand Down
2 changes: 1 addition & 1 deletion quickstart/scripts/train_zero_i2v.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ VALIDATION_ARGS=(
)

# Combine all arguments and launch training
accelerate launch --config_file ./accelerate_config.yaml train.py \
accelerate launch --config_file ../configs/accelerate_config.yaml train.py \
"${MODEL_ARGS[@]}" \
"${OUTPUT_ARGS[@]}" \
"${DATA_ARGS[@]}" \
Expand Down
2 changes: 1 addition & 1 deletion quickstart/scripts/train_zero_t2i.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ VALIDATION_ARGS=(
)

# Combine all arguments and launch training
accelerate launch --config_file ./accelerate_config.yaml train.py \
accelerate launch --config_file ../configs/accelerate_config.yaml train.py \
"${MODEL_ARGS[@]}" \
"${OUTPUT_ARGS[@]}" \
"${DATA_ARGS[@]}" \
Expand Down
2 changes: 1 addition & 1 deletion quickstart/scripts/train_zero_t2v.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ VALIDATION_ARGS=(
)

# Combine all arguments and launch training
accelerate launch --config_file ./accelerate_config.yaml train.py \
accelerate launch --config_file ../configs/accelerate_config.yaml train.py \
"${MODEL_ARGS[@]}" \
"${OUTPUT_ARGS[@]}" \
"${DATA_ARGS[@]}" \
Expand Down
4 changes: 0 additions & 4 deletions src/cogkit/datasets/i2v_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@

from .utils import (
get_prompt_embedding,
load_images,
load_images_from_videos,
load_prompts,
load_videos,
preprocess_image_with_resize,
preprocess_video_with_resize,
)
Expand Down
6 changes: 2 additions & 4 deletions src/cogkit/datasets/t2i_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@
import torch
from accelerate.logging import get_logger
from datasets import load_dataset
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
from typing_extensions import override

from cogmodels.finetune.diffusion.constants import LOG_LEVEL, LOG_NAME
from cogkit.finetune.diffusion.constants import LOG_LEVEL, LOG_NAME

from .utils import (
preprocess_image_with_resize,
Expand All @@ -20,7 +18,7 @@
)

if TYPE_CHECKING:
from cogmodels.finetune.diffusion.trainer import DiffusionTrainer
from cogkit.finetune.diffusion.trainer import DiffusionTrainer

# Must import after torch because this can sometimes lead to a nasty segmentation fault, or stack smashing error
# Very few bug reports but it happens. Look in decord Github issues for more relevant information.
Expand Down
7 changes: 3 additions & 4 deletions src/cogkit/datasets/t2v_dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import hashlib
from pathlib import Path
from typing import TYPE_CHECKING, Any

Expand All @@ -11,12 +10,12 @@
from torchvision import transforms
from typing_extensions import override

from cogmodels.finetune.diffusion.constants import LOG_LEVEL, LOG_NAME
from cogkit.finetune.diffusion.constants import LOG_LEVEL, LOG_NAME

from .utils import load_prompts, load_videos, preprocess_video_with_resize, get_prompt_embedding
from .utils import get_prompt_embedding, preprocess_video_with_resize

if TYPE_CHECKING:
from cogmodels.finetune.diffusion.trainer import DiffusionTrainer
from cogkit.finetune.diffusion.trainer import DiffusionTrainer

logger = get_logger(LOG_NAME, LOG_LEVEL)

Expand Down
1 change: 0 additions & 1 deletion src/cogkit/datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import cv2
import numpy as np
import torch
import torchvision.transforms as transforms
from PIL import Image
from safetensors.torch import load_file, save_file

Expand Down
14 changes: 13 additions & 1 deletion src/cogkit/finetune/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,19 @@

from cogkit.finetune.base import BaseTrainer

# import register first
from cogkit.finetune.register import get_model_cls, register, show_supported_models # noqa

# import resgistered models
from cogkit.finetune.diffusion import models as diffusion_models
from cogkit.finetune.llm import models as llm_models
from cogkit.finetune.register import get_model_cls, register, show_supported_models


__all__ = [
"BaseTrainer",
"diffusion_models",
"llm_models",
"get_model_cls",
"register",
"show_supported_models",
]
8 changes: 8 additions & 0 deletions src/cogkit/finetune/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,11 @@
from .base_component import BaseComponents
from .base_state import BaseState
from .base_trainer import BaseTrainer


__all__ = [
"BaseArgs",
"BaseComponents",
"BaseState",
"BaseTrainer",
]
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def validation_step(
Return the data that needs to be saved. For videos, the data format is List[PIL],
and for images, the data format is PIL
"""
prompt, prompt_embedding, image, video = (
prompt, prompt_embedding, image, _ = (
eval_data["prompt"],
eval_data["prompt_embedding"],
eval_data["image"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def validation_step(
Return the data that needs to be saved. For images, the data format is PIL
"""
prompt = eval_data["prompt"]
prompt_embedding = eval_data["prompt_embedding"]
_ = eval_data["prompt_embedding"]

image_generate = pipe(
height=self.state.train_resolution[0],
Expand Down
15 changes: 3 additions & 12 deletions src/cogkit/finetune/diffusion/trainer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import hashlib
import json
from typing import Any

Expand All @@ -7,29 +6,21 @@
from accelerate.utils import (
gather_object,
)
from diffusers.pipelines import DiffusionPipeline
from diffusers.utils.export_utils import export_to_video
from PIL import Image
from typing_extensions import override

from cogkit.datasets import I2VDatasetWithResize, T2IDatasetWithResize, T2VDatasetWithResize
from cogkit.finetune.base import BaseTrainer
from diffusers.pipelines import DiffusionPipeline
from diffusers.utils.export_utils import export_to_video

from ..utils import (
cast_training_params,
free_memory,
get_memory_statistics,
string_to_filename,
unload_model,
)
from .constants import LOG_LEVEL, LOG_NAME
from .diff_datasets import I2VDatasetWithResize, T2IDatasetWithResize, T2VDatasetWithResize
from .diff_datasets.utils import (
load_images,
load_prompts,
load_videos,
preprocess_image_with_resize,
preprocess_video_with_resize,
)
from .schemas import DiffusionArgs, DiffusionComponents, DiffusionState


Expand Down
12 changes: 7 additions & 5 deletions src/cogkit/finetune/register.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
# -*- coding: utf-8 -*-


from typing import Literal
from typing import Literal, TYPE_CHECKING

from cogkit.finetune import BaseTrainer
# using TYPE_CHECKING to avoid circular import
if TYPE_CHECKING:
from cogkit.finetune import BaseTrainer

SUPPORTED_MODELS: dict[str, dict[str, BaseTrainer]] = {}
SUPPORTED_MODELS: dict[str, dict[str, "BaseTrainer"]] = {}


def register(
model_name: str,
training_type: Literal["lora", "sft"],
trainer_cls: BaseTrainer,
trainer_cls: "BaseTrainer",
):
"""Register a model and its associated functions for a specific training type.

Expand Down Expand Up @@ -43,7 +45,7 @@ def show_supported_models():
print(f" • {training_type}")


def get_model_cls(model_type: str, training_type: Literal["lora", "sft"]) -> BaseTrainer:
def get_model_cls(model_type: str, training_type: Literal["lora", "sft"]) -> "BaseTrainer":
"""Get the trainer class for a specific model and training type."""
if model_type not in SUPPORTED_MODELS:
print(f"\nModel '{model_type}' is not supported.")
Expand Down
10 changes: 5 additions & 5 deletions src/cogkit/finetune/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .checkpointing import *
from .file_utils import *
from .memory_utils import *
from .optimizer_utils import *
from .torch_utils import *
from .checkpointing import * # noqa
from .file_utils import * # noqa
from .memory_utils import * # noqa
from .optimizer_utils import * # noqa
from .torch_utils import * # noqa
22 changes: 12 additions & 10 deletions tests/utils/test_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,26 @@
IMAGE_FILE = "dog.jpg"
VIDEO_FILE = "dog.mp4"


@pytest.mark.parametrize("model_id_or_path", [MODEL_ID])
# test utils.diffusion_pipeline
def test_get_pipeline_meta(model_id_or_path):
res = diffusion_pipeline.get_pipeline_meta(model_id_or_path)
assert res is not None

@pytest.mark.parametrize("dtypes",["", "bfloat16", "float16"])

@pytest.mark.parametrize("dtypes", ["", "bfloat16", "float16"])
# test utils.dtype
def test_cast_to_torch_dtype(dtypes):
if dtypes == "":
with pytest.raises(ValueError) as exc_info:
dtype.cast_to_torch_dtype(dtypes)
dtype.cast_to_torch_dtype(dtypes)
assert "Unknown data type" in str(exc_info.value)
else:
res = dtype.cast_to_torch_dtype(dtypes)
assert str(dtypes) in str(res)


@pytest.mark.parametrize("model_id_or_path", [MODEL_ID])
@pytest.mark.parametrize("task", ["t2i", "t2v", "i2v", "v2v"])
@pytest.mark.parametrize(
Expand All @@ -31,13 +34,10 @@ def test_cast_to_torch_dtype(dtypes):
[IMAGE_FILE, None],
[None, VIDEO_FILE],
[IMAGE_FILE, VIDEO_FILE],
])
],
)
# test utils.misc
def test_guess_generation_mode(
model_id_or_path,
task,
multimodel
):
def test_guess_generation_mode(model_id_or_path, task, multimodel):
image_file, video_file = multimodel
res = misc.guess_generation_mode(
model_id_or_path,
Expand All @@ -46,14 +46,16 @@ def test_guess_generation_mode(
video_file,
)
assert task == res.value



@pytest.mark.parametrize("save_path", ["CogVideoX-2b"])
# test utils.path
def test_mkdir(save_path):
res = path.mkdir(save_path)
assert res is not None

@pytest.mark.parametrize("seed",[None, 42])

@pytest.mark.parametrize("seed", [None, 42])
# test utils.random
def test_rand_generator(seed):
res = random.rand_generator(seed)
Expand Down