diff --git a/.gitignore b/.gitignore index afb9846..7ab17eb 100644 --- a/.gitignore +++ b/.gitignore @@ -128,9 +128,16 @@ dmypy.json # Pyre type checker .pyre/ +# Cog/Replicate generated files +test +.cog +diffusers-cache +cog_out + # Extra stuff to ignore dreams images run.py test_outputs -examples/music \ No newline at end of file +examples/music +.vscode diff --git a/README.md b/README.md index 6a7c990..91effca 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ Try it yourself in Colab: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nateraw/stable-diffusion-videos/blob/main/stable_diffusion_videos.ipynb) +Try in Replicate: [![Replicate](https://replicate.com/nateraw/stable-diffusion-videos/badge)](https://replicate.com/nateraw/stable-diffusion-videos) + TPU version (~x6 faster than standard colab GPUs): [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nateraw/stable-diffusion-videos/blob/main/flax_stable_diffusion_videos.ipynb) **Example** - morphing between "blueberry spaghetti" and "strawberry spaghetti" @@ -123,6 +125,19 @@ interface = Interface(pipeline) interface.launch() ``` +#### Run with cog + +First, download the pre-trained weights: +```bash +cog run scripts/download_weights +``` + +Run a prediction. Separate each prompt with a `|` +```bash +cog predict -i prompts="a cat | a dog | a horse" +``` + + ## Credits This work built off of [a script](https://gist.github.com/karpathy/00103b0037c5aaea32fe1da1af553355 diff --git a/cog.yaml b/cog.yaml new file mode 100644 index 0000000..51b2533 --- /dev/null +++ b/cog.yaml @@ -0,0 +1,22 @@ +build: + gpu: true + cuda: "11.6.2" + python_version: "3.10" + python_packages: + - "torch==1.13.1 --extra-index-url=https://download.pytorch.org/whl/cu116" + - "ftfy==6.1.1" + - "scipy==1.9.0" + - "transformers==4.21.1" + - "diffusers==0.11.1" + - "fire==0.4.0" + - "ipython==8.5.0" + - "gradio" + - "librosa" + - "av<10.0.0" + - "realesrgan==0.2.5.0" + - "accelerate==0.15.0" + run: + - apt-get update && apt-get install -y software-properties-common + - add-apt-repository ppa:ubuntu-toolchain-r/test + - apt update -y && apt-get install ffmpeg -y +predict: "predict.py:Predictor" \ No newline at end of file diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..b7fae29 --- /dev/null +++ b/predict.py @@ -0,0 +1,145 @@ +import os +import shutil +import torch +from diffusers import DDIMScheduler, PNDMScheduler +from cog import BasePredictor, Input, Path +from diffusers.models import AutoencoderKL +from diffusers.schedulers import LMSDiscreteScheduler + +from stable_diffusion_videos import StableDiffusionWalkPipeline + +MODEL_ID = "runwayml/stable-diffusion-v1-5" +MODEL_VAE = "stabilityai/sd-vae-ft-ema" +MODEL_CACHE = "diffusers-cache" + + +class Predictor(BasePredictor): + def setup(self): + """Load the model into memory to make running multiple predictions efficient""" + print("Loading pipeline...") + + vae = AutoencoderKL.from_pretrained(MODEL_VAE, cache_dir=MODEL_CACHE, local_files_only=True) + + self.pipeline = StableDiffusionWalkPipeline.from_pretrained( + MODEL_ID, + vae=vae, + torch_dtype=torch.float16, + revision="fp16", + safety_checker=None, + cache_dir=MODEL_CACHE, + local_files_only=True, + scheduler=LMSDiscreteScheduler( + beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear" + ) + ).to("cuda") + + default_scheduler = PNDMScheduler( + beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear" + ) + ddim_scheduler = DDIMScheduler( + beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False, + ) + klms_scheduler = LMSDiscreteScheduler( + beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear" + ) + self.SCHEDULERS = dict( + default=default_scheduler, ddim=ddim_scheduler, klms=klms_scheduler + ) + + @torch.inference_mode() + @torch.cuda.amp.autocast() + def predict( + self, + prompts: str = Input( + description="Input prompts, separate each prompt with '|'.", + default="a cat | a dog | a horse", + ), + seeds: str = Input( + description="Random seed, separated with '|' to use different seeds for each of the prompt provided above. Leave blank to randomize the seed.", + default=None, + ), + scheduler: str = Input( + description="Choose the scheduler", + choices=["default", "ddim", "klms"], + default="klms", + ), + num_inference_steps: int = Input( + description="Number of denoising steps for each image generated from the prompt", + ge=1, + le=500, + default=50, + ), + guidance_scale: float = Input( + description="Scale for classifier-free guidance", ge=1, le=20, default=7.5 + ), + num_steps: int = Input( + description="Steps for generating the interpolation video. Recommended to set to 3 or 5 for testing, then up it to 60-200 for better results.", + default=50, + ), + fps: int = Input( + description="Frame rate for the video.", default=15, ge=5, le=60 + ), + ) -> Path: + """Run a single prediction on the model""" + + prompts = [p.strip() for p in prompts.split("|")] + if seeds is None: + print("Setting Random seeds.") + seeds = [int.from_bytes(os.urandom(2), "big") for s in range(len(prompts))] + else: + seeds = [s.strip() for s in seeds.split("|")] + for s in seeds: + assert s.isdigit(), "Please provide integer seed." + seeds = [int(s) for s in seeds] + + if len(seeds) > len(prompts): + seeds = seeds[: len(prompts)] + else: + seeds_not_set = len(prompts) - len(seeds) + print("Setting Random seeds.") + seeds = seeds + [ + int.from_bytes(os.urandom(2), "big") for s in range(seeds_not_set) + ] + + print("Seeds used for prompts are:") + for prompt, seed in zip(prompts, seeds): + print(f"{prompt}: {seed}") + + # use the default settings for the demo + height = 512 + width = 512 + disable_tqdm = False + + self.pipeline.set_progress_bar_config(disable=disable_tqdm) + self.pipeline.scheduler = self.SCHEDULERS[scheduler] + + outdir = "cog_out/out" + if os.path.exists(outdir): + shutil.rmtree(outdir) + os.makedirs(outdir) + + + + self.pipeline.walk( + prompts=prompts, + seeds=seeds, + num_interpolation_steps=num_steps, + output_dir="cog_out", + name="out", + fps=fps, + batch_size=1, + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, + margin=1.0, + smooth=0.2, + height=height, + width=width, + ) + + video_path = f"cog_out/out/out.mp4" + + return Path(video_path) diff --git a/scripts/download_weights.py b/scripts/download_weights.py new file mode 100644 index 0000000..c867421 --- /dev/null +++ b/scripts/download_weights.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +import os +import shutil +import sys + +from diffusers import StableDiffusionPipeline +from diffusers.models import AutoencoderKL + +# append project directory to path so predict.py can be imported +sys.path.append('.') + +from predict import MODEL_CACHE, MODEL_ID, MODEL_VAE + +# if os.path.exists(MODEL_CACHE): +# shutil.rmtree(MODEL_CACHE) +os.makedirs(MODEL_CACHE, exist_ok=True) + +vae = AutoencoderKL.from_pretrained( + MODEL_VAE, + cache_dir=MODEL_CACHE, +) + +pipe = StableDiffusionPipeline.from_pretrained( + MODEL_ID, + vae=vae, + cache_dir=MODEL_CACHE, + revision="fp16", +)