Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Update Tile Pre-Processor to support more modes #6558

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 58 additions & 19 deletions invokeai/app/invocations/controlnet_image_processors.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Invocations for ControlNet image preprocessors
# initial implementation by Gregg Helt, 2023
# heavily leverages controlnet_aux package: https://github.com/patrickvonplaten/controlnet_aux
import random
from builtins import bool, float
from pathlib import Path
from typing import Dict, List, Literal, Union
from typing import Any, Dict, List, Literal, Union

import cv2
import numpy as np
Expand Down Expand Up @@ -39,6 +40,7 @@
from invokeai.backend.image_util.canny import get_canny_edges
from invokeai.backend.image_util.depth_anything import DEPTH_ANYTHING_MODELS, DepthAnythingDetector
from invokeai.backend.image_util.dw_openpose import DWPOSE_MODELS, DWOpenposeDetector
from invokeai.backend.image_util.fast_guided_filter.fast_guided_filter import FastGuidedFilter
from invokeai.backend.image_util.hed import HEDProcessor
from invokeai.backend.image_util.lineart import LineartProcessor
from invokeai.backend.image_util.lineart_anime import LineartAnimeProcessor
Expand Down Expand Up @@ -483,30 +485,67 @@ class TileResamplerProcessorInvocation(ImageProcessorInvocation):

# res: int = InputField(default=512, ge=0, le=1024, description="The pixel resolution for each tile")
down_sampling_rate: float = InputField(default=1.0, ge=1.0, le=8.0, description="Down sampling rate")
mode: Literal["regular", "blur", "var", "super"] = InputField(
default="regular", description="The controlnet tile model being used"
)

def apply_gaussian_blur(self, image_np: np.ndarray[Any, Any], ksize: int = 5, sigmaX: float = 1.0):
if ksize % 2 == 0:
ksize += 1 # ksize must be odd
blurred_image = cv2.GaussianBlur(image_np, (ksize, ksize), sigmaX=sigmaX)
return blurred_image

def apply_guided_filter(self, image_np: np.ndarray[Any, Any], radius: int, eps: float, scale: int):
filter = FastGuidedFilter(image_np, radius, eps, scale)
return filter.filter(image_np)

# based off https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic
def tile_resample(self, np_img: np.ndarray[Any, Any]):
height, width, _ = np_img.shape

if self.mode == "regular":
np_img = HWC3(np_img)
if self.down_sampling_rate < 1.1:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realize that you didn't change this code, but do you know why 1.1 was chosen as the cut-off? It seems like we intentionally chose to allow float down sampling rates, implying that a value such as 1.07 could be a valid downsampling rate.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure why this was done originally. I'll need to check.

return np_img

# tile_resample copied from sd-webui-controlnet/scripts/processor.py
def tile_resample(
self,
np_img: np.ndarray,
res=512, # never used?
down_sampling_rate=1.0,
):
np_img = HWC3(np_img)
if down_sampling_rate < 1.1:
new_height = int(float(height) / float(self.down_sampling_rate))
new_width = int(float(width) / float(self.down_sampling_rate))
np_img = cv2.resize(np_img, (new_width, new_height), interpolation=cv2.INTER_AREA)
return np_img
H, W, C = np_img.shape
H = int(float(H) / float(down_sampling_rate))
W = int(float(W) / float(down_sampling_rate))
np_img = cv2.resize(np_img, (W, H), interpolation=cv2.INTER_AREA)

ratio = np.sqrt(1024.0 * 1024.0 / (width * height))

resize_w, resize_h = int(width * ratio), int(height * ratio)

if self.mode == "super":
resize_w, resize_h = int(width * ratio) // 48 * 48, int(height * ratio) // 48 * 48

np_img = cv2.resize(np_img, (resize_w, resize_h))

if self.mode == "blur":
blur_strength = random.sample([i / 10.0 for i in range(10, 201, 2)], k=1)[0]
radius = random.sample([i for i in range(1, 40, 2)], k=1)[0] # noqa: C416
eps = random.sample([i / 1000.0 for i in range(1, 101, 2)], k=1)[0]
scale_factor = random.sample([i / 10.0 for i in range(10, 181, 5)], k=1)[0]

if random.random() > 0.5:
np_img = self.apply_gaussian_blur(np_img, ksize=int(blur_strength), sigmaX=blur_strength / 2)

if random.random() > 0.5:
np_img = self.apply_guided_filter(np_img, radius, eps, int(scale_factor))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why non-deterministic? Should we make these two different modes?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also how this particular model seems to prefer it: https://huggingface.co/xinsir/controlnet-tile-sdxl-1.0


np_img = cv2.resize(
np_img, (int(resize_w / scale_factor), int(resize_h / scale_factor)), interpolation=cv2.INTER_AREA
)
np_img = cv2.resize(np_img, (resize_w, resize_h), interpolation=cv2.INTER_CUBIC)

# np_img = cv2.cvtColor(np_img, cv2.COLOR_BGR2RGB)

return np_img

def run_processor(self, image: Image.Image) -> Image.Image:
np_img = np.array(image, dtype=np.uint8)
processed_np_image = self.tile_resample(
np_img,
# res=self.tile_size,
down_sampling_rate=self.down_sampling_rate,
)
processed_np_image = self.tile_resample(np_img)
processed_image = Image.fromarray(processed_np_image)
return processed_image

Expand Down
283 changes: 283 additions & 0 deletions invokeai/backend/image_util/fast_guided_filter/fast_guided_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
# ruff: noqa: E741
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file appears to have been copied from somewhere. Can you link to the original?

Also, why use this implementation over cv2.ximgproc.guidedFilter?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR was made to support this new Tile model: https://huggingface.co/xinsir/controlnet-tile-sdxl-1.0

This one uses a custom version of the guidedFilter which they claim to be faster. I have not done benchmarks to test this myself. Functionally using the inbuilt guidedFilter from cv2 should also work just fine imo.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like it would be easy to test for both speed and behavior. I think that's worth doing so we can decide whether it's worth bringing in this file.

If we do decide that this file is better than the cv2 version, then we should:

  • Explain why we are using this over cv2 at the top of this file.
  • Permalink to the source that this file was copied from.
  • Remove import cv2.ximgproc from the invocation file.

# -*- coding: utf-8 -*-
## @package guided_filter.core.filters
#
# Implementation of guided filter.
# * GuidedFilter: Original guided filter.
# * FastGuidedFilter: Fast version of the guided filter.
# @author tody
# @date 2015/08/26


import cv2
import numpy as np


## Convert image into float32 type.
def to32F(img):
if img.dtype == np.float32:
return img
return (1.0 / 255.0) * np.float32(img)


## Convert image into uint8 type.
def to8U(img):
if img.dtype == np.uint8:
return img
return np.clip(np.uint8(255.0 * img), 0, 255)


## Return if the input image is gray or not.
def _isGray(I):
return len(I.shape) == 2


## Return down sampled image.
# @param scale (w/s, h/s) image will be created.
# @param shape I.shape[:2]=(h, w). numpy friendly size parameter.
def _downSample(I, scale=4, shape=None):
if shape is not None:
h, w = shape
return cv2.resize(I, (w, h), interpolation=cv2.INTER_NEAREST)

h, w = I.shape[:2]
return cv2.resize(I, (int(w / scale), int(h / scale)), interpolation=cv2.INTER_NEAREST)


## Return up sampled image.
# @param scale (w*s, h*s) image will be created.
# @param shape I.shape[:2]=(h, w). numpy friendly size parameter.
def _upSample(I, scale=2, shape=None):
if shape is not None:
h, w = shape
return cv2.resize(I, (w, h), interpolation=cv2.INTER_LINEAR)

h, w = I.shape[:2]
return cv2.resize(I, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_LINEAR)


## Fast guide filter.
class FastGuidedFilter:
## Constructor.
# @param I Input guidance image. Color or gray.
# @param radius Radius of Guided Filter.
# @param epsilon Regularization term of Guided Filter.
# @param scale Down sampled scale.
def __init__(self, I, radius=5, epsilon=0.4, scale=4):
I_32F = to32F(I)
self._I = I_32F
h, w = I.shape[:2]

I_sub = _downSample(I_32F, scale)

self._I_sub = I_sub
radius = int(radius / scale)

if _isGray(I):
self._guided_filter = GuidedFilterGray(I_sub, radius, epsilon)
else:
self._guided_filter = GuidedFilterColor(I_sub, radius, epsilon)

## Apply filter for the input image.
# @param p Input image for the filtering.
def filter(self, p):
p_32F = to32F(p)
shape_original = p.shape[:2]

p_sub = _downSample(p_32F, shape=self._I_sub.shape[:2])

if _isGray(p_sub):
return self._filterGray(p_sub, shape_original)

cs = p.shape[2]
q = np.array(p_32F)

for ci in range(cs):
q[:, :, ci] = self._filterGray(p_sub[:, :, ci], shape_original)
return to8U(q)

def _filterGray(self, p_sub, shape_original):
ab_sub = self._guided_filter._computeCoefficients(p_sub)
ab = [_upSample(abi, shape=shape_original) for abi in ab_sub]
return self._guided_filter._computeOutput(ab, self._I)


## Guide filter.
class GuidedFilter:
## Constructor.
# @param I Input guidance image. Color or gray.
# @param radius Radius of Guided Filter.
# @param epsilon Regularization term of Guided Filter.
def __init__(self, I, radius=5, epsilon=0.4):
I_32F = to32F(I)

if _isGray(I):
self._guided_filter = GuidedFilterGray(I_32F, radius, epsilon)
else:
self._guided_filter = GuidedFilterColor(I_32F, radius, epsilon)

## Apply filter for the input image.
# @param p Input image for the filtering.
def filter(self, p):
return to8U(self._guided_filter.filter(p))


## Common parts of guided filter.
#
# This class is used by guided_filter class. GuidedFilterGray and GuidedFilterColor.
# Based on guided_filter._computeCoefficients, guided_filter._computeOutput,
# GuidedFilterCommon.filter computes filtered image for color and gray.
class GuidedFilterCommon:
def __init__(self, guided_filter):
self._guided_filter = guided_filter

## Apply filter for the input image.
# @param p Input image for the filtering.
def filter(self, p):
p_32F = to32F(p)
if _isGray(p_32F):
return self._filterGray(p_32F)

cs = p.shape[2]
q = np.array(p_32F)

for ci in range(cs):
q[:, :, ci] = self._filterGray(p_32F[:, :, ci])
return q

def _filterGray(self, p):
ab = self._guided_filter._computeCoefficients(p)
return self._guided_filter._computeOutput(ab, self._guided_filter._I)


## Guided filter for gray guidance image.
class GuidedFilterGray:
# @param I Input gray guidance image.
# @param radius Radius of Guided Filter.
# @param epsilon Regularization term of Guided Filter.
def __init__(self, I, radius=5, epsilon=0.4):
self._radius = 2 * radius + 1
self._epsilon = epsilon
self._I = to32F(I)
self._initFilter()
self._filter_common = GuidedFilterCommon(self)

## Apply filter for the input image.
# @param p Input image for the filtering.
def filter(self, p):
return self._filter_common.filter(p)

def _initFilter(self):
I = self._I
r = self._radius
self._I_mean = cv2.blur(I, (r, r))
I_mean_sq = cv2.blur(I**2, (r, r))
self._I_var = I_mean_sq - self._I_mean**2

def _computeCoefficients(self, p):
r = self._radius
p_mean = cv2.blur(p, (r, r))
p_cov = p_mean - self._I_mean * p_mean
a = p_cov / (self._I_var + self._epsilon)
b = p_mean - a * self._I_mean
a_mean = cv2.blur(a, (r, r))
b_mean = cv2.blur(b, (r, r))
return a_mean, b_mean

def _computeOutput(self, ab, I):
a_mean, b_mean = ab
return a_mean * I + b_mean


## Guided filter for color guidance image.
class GuidedFilterColor:
# @param I Input color guidance image.
# @param radius Radius of Guided Filter.
# @param epsilon Regularization term of Guided Filter.
def __init__(self, I, radius=5, epsilon=0.2):
self._radius = 2 * radius + 1
self._epsilon = epsilon
self._I = to32F(I)
self._initFilter()
self._filter_common = GuidedFilterCommon(self)

## Apply filter for the input image.
# @param p Input image for the filtering.
def filter(self, p):
return self._filter_common.filter(p)

def _initFilter(self):
I = self._I
r = self._radius
eps = self._epsilon

Ir, Ig, Ib = I[:, :, 0], I[:, :, 1], I[:, :, 2]

self._Ir_mean = cv2.blur(Ir, (r, r))
self._Ig_mean = cv2.blur(Ig, (r, r))
self._Ib_mean = cv2.blur(Ib, (r, r))

Irr_var = cv2.blur(Ir**2, (r, r)) - self._Ir_mean**2 + eps
Irg_var = cv2.blur(Ir * Ig, (r, r)) - self._Ir_mean * self._Ig_mean
Irb_var = cv2.blur(Ir * Ib, (r, r)) - self._Ir_mean * self._Ib_mean
Igg_var = cv2.blur(Ig * Ig, (r, r)) - self._Ig_mean * self._Ig_mean + eps
Igb_var = cv2.blur(Ig * Ib, (r, r)) - self._Ig_mean * self._Ib_mean
Ibb_var = cv2.blur(Ib * Ib, (r, r)) - self._Ib_mean * self._Ib_mean + eps

Irr_inv = Igg_var * Ibb_var - Igb_var * Igb_var
Irg_inv = Igb_var * Irb_var - Irg_var * Ibb_var
Irb_inv = Irg_var * Igb_var - Igg_var * Irb_var
Igg_inv = Irr_var * Ibb_var - Irb_var * Irb_var
Igb_inv = Irb_var * Irg_var - Irr_var * Igb_var
Ibb_inv = Irr_var * Igg_var - Irg_var * Irg_var

I_cov = Irr_inv * Irr_var + Irg_inv * Irg_var + Irb_inv * Irb_var
Irr_inv /= I_cov
Irg_inv /= I_cov
Irb_inv /= I_cov
Igg_inv /= I_cov
Igb_inv /= I_cov
Ibb_inv /= I_cov

self._Irr_inv = Irr_inv
self._Irg_inv = Irg_inv
self._Irb_inv = Irb_inv
self._Igg_inv = Igg_inv
self._Igb_inv = Igb_inv
self._Ibb_inv = Ibb_inv

def _computeCoefficients(self, p):
r = self._radius
I = self._I
Ir, Ig, Ib = I[:, :, 0], I[:, :, 1], I[:, :, 2]

p_mean = cv2.blur(p, (r, r))

Ipr_mean = cv2.blur(Ir * p, (r, r))
Ipg_mean = cv2.blur(Ig * p, (r, r))
Ipb_mean = cv2.blur(Ib * p, (r, r))

Ipr_cov = Ipr_mean - self._Ir_mean * p_mean
Ipg_cov = Ipg_mean - self._Ig_mean * p_mean
Ipb_cov = Ipb_mean - self._Ib_mean * p_mean

ar = self._Irr_inv * Ipr_cov + self._Irg_inv * Ipg_cov + self._Irb_inv * Ipb_cov
ag = self._Irg_inv * Ipr_cov + self._Igg_inv * Ipg_cov + self._Igb_inv * Ipb_cov
ab = self._Irb_inv * Ipr_cov + self._Igb_inv * Ipg_cov + self._Ibb_inv * Ipb_cov
b = p_mean - ar * self._Ir_mean - ag * self._Ig_mean - ab * self._Ib_mean

ar_mean = cv2.blur(ar, (r, r))
ag_mean = cv2.blur(ag, (r, r))
ab_mean = cv2.blur(ab, (r, r))
b_mean = cv2.blur(b, (r, r))

return ar_mean, ag_mean, ab_mean, b_mean

def _computeOutput(self, ab, I):
ar_mean, ag_mean, ab_mean, b_mean = ab

Ir, Ig, Ib = I[:, :, 0], I[:, :, 1], I[:, :, 2]

q = ar_mean * Ir + ag_mean * Ig + ab_mean * Ib + b_mean

return q
Loading
Loading