diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..92042a1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,175 @@ +.glut/ +outputs/ +models/ +temp/ +checkpoints/ +results/ +*.bat +*.pdf +hotwords.txt +test.py +glut.py +VDM_Decoder/arguments.py + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/.project-root b/.project-root new file mode 100644 index 0000000..e69de29 diff --git a/MLLM/configs/clm_models/animegamer_clm.yaml b/MLLM/configs/clm_models/animegamer_clm.yaml index 0394553..42575b5 100644 --- a/MLLM/configs/clm_models/animegamer_clm.yaml +++ b/MLLM/configs/clm_models/animegamer_clm.yaml @@ -21,4 +21,4 @@ learnable_pos: False lm_loss_scale: 1.0 rec_loss_scale: 1.0 -pretrained_model_path: ./checkpoints/Game-Qiqi/MLLM/pytorch_model.bin +pretrained_model_path: ./checkpoints/AnimeGamer/MLLM-Qiqi/pytorch_model.bin diff --git a/README.md b/README.md index ef1d98f..23cf68b 100644 --- a/README.md +++ b/README.md @@ -50,9 +50,6 @@ AnimeGamer is built upon Multimodal Large Language Models (MLLMs) to generate ea - [ ] Release wights of models trained on a mixture of anime films (the same setting as in our paper) ## 📏 Quick Start - -Please first download the checkpoints of [AnimeGamer](https://huggingface.co/TencentARC/AnimeGamer) and [Mistral-7B](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1), and save them under the folder `./checkpoints`. - To set up the environment for inference, you can run the following command: ```shell git clone https://github.com/TencentARC/AnimeGamer.git @@ -60,6 +57,10 @@ cd AnimeGamer conda create -n animegamer python==3.10 -y conda activate animegamer pip install -r requirements.txt +cd checkpoints +git clone https://huggingface.co/TencentARC/AnimeGamer +git clone https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 +git clone https://huggingface.co/Gluttony10/CogVideoX-2b-sat ``` To generate action-aware multimodal representations and update character states, you can run: @@ -78,7 +79,7 @@ Change the instructions in `./game_demo` to customize your play. ## 🤗 Acknowledgements -We refer to [CogvideoX](https://github.com/XLabs-AI/x-flux) and [SEED-X](https://github.com/AILab-CVC/SEED-X/tree/main) to build our codebase. Thanks for their wonderful project. +We refer to [CogvideoX](https://github.com/THUDM/CogVideo) and [SEED-X](https://github.com/AILab-CVC/SEED-X) to build our codebase. Thanks for their wonderful project. diff --git a/VDM_Decoder/configs/cogvideox_2b.yaml b/VDM_Decoder/configs/cogvideox_2b.yaml index 38f77ea..c82a24f 100644 --- a/VDM_Decoder/configs/cogvideox_2b.yaml +++ b/VDM_Decoder/configs/cogvideox_2b.yaml @@ -92,7 +92,7 @@ model: ucg_rate: 0.1 target: VDM_Decoder.sgm.modules.encoders.modules.FrozenT5Embedder params: - model_dir: "/group/40034/junhaocheng/CogVideo/CogVideoX-2b-sat/t5-v1_1-xxl" + model_dir: "./checkpoints/CogVideoX-2b-sat/t5-v1_1-xxl" max_length: 226 is_plan_3: True @@ -100,7 +100,7 @@ model: target: VDM_Decoder.vae_modules.autoencoder.VideoAutoencoderInferenceWrapper params: cp_size: 1 - ckpt_path: "/group/40034/junhaocheng/CogVideo/CogVideoX-2b-sat/vae/3d-vae.pt" + ckpt_path: "./checkpoints/CogVideoX-2b-sat/vae/3d-vae.pt" ignore_keys: [ 'loss' ] loss_config: diff --git a/VDM_Decoder/configs/inference.yaml b/VDM_Decoder/configs/inference.yaml index 7d207f6..2d8acd9 100644 --- a/VDM_Decoder/configs/inference.yaml +++ b/VDM_Decoder/configs/inference.yaml @@ -2,7 +2,7 @@ args: image2video: False latent_channels: 16 mode: inference - load: "./checkpoints/Game-Qiqi/VDM_Decoder" + load: "./checkpoints/AnimeGamer/VDM_Decoder-Qiqi" batch_size: 1 sampling_image_size: [480, 720] sampling_num_frames: 5 diff --git a/requirements.txt b/requirements.txt index ef24239..cf3e688 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,9 +27,9 @@ pillow==9.5.0 scikit-video safetensors tokenizers==0.19 -xformers -torch==2.4.0 -torchvision==0.19.0 +xformers==0.0.28.post2 +torch==2.5.0 +torchvision==0.20.0 wandb==0.17.5 pytorch_lightning==2.3.3 open_clip_torch==2.22.0 @@ -38,4 +38,7 @@ fsspec==2024.5.0 scipy==1.14.0 loguru SwissArmyTransformer==0.4.12 -omegaconf==2.3.0 \ No newline at end of file +omegaconf==2.3.0 +bitsandbytes +huggingface_hub[hf_xet] +triton \ No newline at end of file