Skip to content

Commit 8387956

Browse files
Xiake Sunilya-lavrenov
andauthored
Add SD3 LoRA Adapter Support (openvinotoolkit#2187)
**Details:** - Add SD3 LoRA Adapter Support for Text2ImagePipeline - Verified model: - Base model: https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers - LoRA: https://civitai.com/models/515022/sd3-first-anime-lora-test - Test step: - Model conversion: `optimum-cli export openvino --model stabilityai/stable-diffusion-3-medium-diffusers --weight-format fp16 stable-diffusion-3-medium-diffusers-ov/FP16 --task text-to-image --trust-remote-code` - Model inference: `build/samples/cpp/image_generation/lora_text2image stable-diffusion-3-medium-diffusers-ov/FP16/ "A woman with black hair, green eyes, leaking flowing light, in front of a black background, she wears a su it, necklace, gold earrings, only eyes have color" SD3-First_Anime_Lora_test/Highstep/bl_128_1600.safetensors 0.7` **Tickets:** CVS-156874 --------- Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
1 parent 2e63481 commit 8387956

9 files changed

Lines changed: 50 additions & 11 deletions

SUPPORTED_MODELS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
263263
<td>Supported</td>
264264
<td>Supported</td>
265265
<td>Supported</td>
266-
<td>Not supported</td>
266+
<td>Partially supported</td>
267267
<td>
268268
<ul>
269269
<li><a href="https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers"><code>stabilityai/stable-diffusion-3-medium-diffusers</code></a></li>

src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
#include "openvino/runtime/infer_request.hpp"
1212
#include "openvino/runtime/properties.hpp"
1313
#include "openvino/runtime/tensor.hpp"
14-
1514
#include "openvino/genai/visibility.hpp"
15+
#include "openvino/genai/lora_adapter.hpp"
1616

1717
namespace ov {
1818
namespace genai {
@@ -75,6 +75,8 @@ class OPENVINO_GENAI_EXPORTS SD3Transformer2DModel {
7575
return compile(device, ov::AnyMap{std::forward<Properties>(properties)...});
7676
}
7777

78+
void set_adapters(const std::optional<AdapterConfig>& adapters);
79+
7880
void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states);
7981

8082
ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep);
@@ -87,6 +89,7 @@ class OPENVINO_GENAI_EXPORTS SD3Transformer2DModel {
8789
ov::InferRequest m_request;
8890
std::shared_ptr<ov::Model> m_model;
8991
size_t m_vae_scale_factor;
92+
AdapterController m_adapter_controller;
9093

9194
class InferenceDynamic;
9295
class InferenceStaticBS1;

src/cpp/src/image_generation/flux_pipeline.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ class FluxPipeline : public DiffusionPipeline {
636636
}
637637
}
638638

639-
// Returns non-empty updated adapters iff they are required to be updated
639+
// Returns non-empty updated adapters if they are required to be updated
640640
static std::optional<AdapterConfig> derived_adapters(const AdapterConfig& adapters) {
641641
return ov::genai::derived_adapters(adapters, flux_adapter_normalization);
642642
}

src/cpp/src/image_generation/models/sd3_transformer_2d_model.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include "json_utils.hpp"
1111
#include "utils.hpp"
12+
#include "lora/helper.hpp"
1213

1314
namespace ov {
1415
namespace genai {
@@ -86,6 +87,12 @@ SD3Transformer2DModel& SD3Transformer2DModel::reshape(int batch_size,
8687

8788
SD3Transformer2DModel& SD3Transformer2DModel::compile(const std::string& device, const ov::AnyMap& properties) {
8889
OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model");
90+
std::optional<AdapterConfig> adapters;
91+
auto filtered_properties = extract_adapters_from_properties(properties, &adapters);
92+
if (adapters) {
93+
adapters->set_tensor_name_prefix(adapters->get_tensor_name_prefix().value_or("transformer"));
94+
m_adapter_controller = AdapterController(m_model, *adapters, device);
95+
}
8996

9097
if (device.find("NPU") != std::string::npos) {
9198
m_impl = std::make_shared<SD3Transformer2DModel::InferenceStaticBS1>();
@@ -94,14 +101,21 @@ SD3Transformer2DModel& SD3Transformer2DModel::compile(const std::string& device,
94101
m_impl = std::make_shared<SD3Transformer2DModel::InferenceDynamic>();
95102
}
96103

97-
m_impl->compile(m_model, device, properties);
104+
m_impl->compile(m_model, device, *filtered_properties);
98105

99106
// release the original model
100107
m_model.reset();
101108

102109
return *this;
103110
}
104111

112+
void SD3Transformer2DModel::set_adapters(const std::optional<AdapterConfig>& adapters) {
113+
OPENVINO_ASSERT(m_impl, "Transformer model must be compiled first");
114+
if(adapters) {
115+
m_impl->set_adapters(m_adapter_controller, *adapters);
116+
}
117+
}
118+
105119
void SD3Transformer2DModel::set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) {
106120
OPENVINO_ASSERT(m_impl, "Transformer model must be compiled first");
107121
m_impl->set_hidden_states(tensor_name, encoder_hidden_states);

src/cpp/src/image_generation/models/sd3transformer_2d_inference.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ class SD3Transformer2DModel::Inference {
1414
public:
1515
virtual void compile(std::shared_ptr<ov::Model> model, const std::string& device, const ov::AnyMap& properties) = 0;
1616
virtual void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) = 0;
17+
virtual void set_adapters(AdapterController& m_adapter_controller, const AdapterConfig& adapters) = 0;
1718
virtual ov::Tensor infer(ov::Tensor latent_model_input, ov::Tensor timestep) = 0;
1819

1920
// utility function to resize model given optional dimensions.

src/cpp/src/image_generation/models/sd3transformer_2d_inference_dynamic.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ class SD3Transformer2DModel::InferenceDynamic : public SD3Transformer2DModel::In
1919
m_request = compiled_model.create_infer_request();
2020
}
2121

22+
virtual void set_adapters(AdapterController& m_adapter_controller, const AdapterConfig& adapters) override {
23+
OPENVINO_ASSERT(m_request, "Transformer model must be compiled first");
24+
m_adapter_controller.apply(m_request, adapters);
25+
}
26+
2227
virtual void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) override {
2328
OPENVINO_ASSERT(m_request, "Transformer model must be compiled first");
2429
m_request.set_tensor(tensor_name, encoder_hidden_states);

src/cpp/src/image_generation/models/sd3transformer_2d_inference_static_bs1.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ class SD3Transformer2DModel::InferenceStaticBS1 : public SD3Transformer2DModel::
4545
}
4646
}
4747

48+
virtual void set_adapters(AdapterController& m_adapter_controller, const AdapterConfig& adapters) override {
49+
for (auto& m_request : m_requests) {
50+
OPENVINO_ASSERT(m_request, "Transformer model must be compiled first");
51+
m_adapter_controller.apply(m_request, adapters);
52+
}
53+
}
54+
4855
virtual void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) override {
4956
OPENVINO_ASSERT(m_native_batch_size && m_native_batch_size == m_requests.size(),
5057
"Transformer model must be compiled first");

src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "openvino/genai/image_generation/sd3_transformer_2d_model.hpp"
1414

1515
#include "utils.hpp"
16+
#include "lora/helper.hpp"
1617

1718
namespace {
1819

@@ -136,22 +137,19 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
136137
using utils::read_json_param;
137138

138139
set_scheduler(Scheduler::from_config(root_dir / "scheduler/scheduler_config.json"));
139-
140140
const std::string text_encoder = data["text_encoder"][1].get<std::string>();
141141
if (text_encoder == "CLIPTextModelWithProjection") {
142142
m_clip_text_encoder_1 =
143143
std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder", device, properties);
144144
} else {
145145
OPENVINO_THROW("Unsupported '", text_encoder, "' text encoder type");
146146
}
147-
148147
const std::string text_encoder_2 = data["text_encoder_2"][1].get<std::string>();
149148
if (text_encoder_2 == "CLIPTextModelWithProjection") {
150149
m_clip_text_encoder_2 = std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder_2", device, properties);
151150
} else {
152151
OPENVINO_THROW("Unsupported '", text_encoder_2, "' text encoder type");
153152
}
154-
155153
const auto text_encoder_3_json = data["text_encoder_3"][1];
156154
if (!text_encoder_3_json.is_null()) {
157155
const std::string text_encoder_3 = text_encoder_3_json.get<std::string>();
@@ -161,7 +159,6 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
161159
OPENVINO_THROW("Unsupported '", text_encoder_3, "' text encoder type");
162160
}
163161
}
164-
165162
const std::string transformer = data["transformer"][1].get<std::string>();
166163
if (transformer == "SD3Transformer2DModel") {
167164
m_transformer = std::make_shared<SD3Transformer2DModel>(root_dir / "transformer", device, properties);
@@ -184,7 +181,6 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
184181

185182
// initialize generation config
186183
initialize_generation_config(data["_class_name"].get<std::string>());
187-
188184
update_adapters_from_properties(properties, m_generation_config.adapters);
189185
}
190186

@@ -456,7 +452,13 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
456452
}
457453

458454
void set_lora_adapters(std::optional<AdapterConfig> adapters) override {
459-
OPENVINO_THROW("LORA adapters are not implemented for Stable Diffusion 3 yet");
455+
if(adapters) {
456+
if(auto updated_adapters = derived_adapters(*adapters)) {
457+
adapters = updated_adapters;
458+
}
459+
// TODO: Add LoRA Adapter support for text encoders
460+
m_transformer->set_adapters(adapters);
461+
}
460462
}
461463

462464
ov::Tensor generate(const std::string& positive_prompt,
@@ -486,6 +488,8 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
486488

487489
check_inputs(generation_config, initial_image);
488490

491+
set_lora_adapters(generation_config.adapters);
492+
489493
// 3. Prepare timesteps
490494
m_scheduler->set_timesteps(generation_config.num_inference_steps, generation_config.strength);
491495

@@ -586,6 +590,12 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
586590
return m_perf_metrics;
587591
}
588592

593+
protected:
594+
// Returns non-empty updated adapters if they are required to be updated
595+
static std::optional<AdapterConfig> derived_adapters(const AdapterConfig& adapters) {
596+
return ov::genai::derived_adapters(adapters, flux_adapter_normalization);
597+
}
598+
589599
private:
590600
size_t get_config_in_channels() const override {
591601
assert(m_transformer != nullptr);

src/cpp/src/lora/adapter.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1069,7 +1069,6 @@ struct AdapterControllerImpl {
10691069
}
10701070

10711071
auto state = infer_request.query_state();
1072-
10731072
// TODO: Forced to use variable_id instead of index to address the state tensors, require the same order for state as for variables from plugins
10741073

10751074
// Convert LoRAVarIDs to LoRAIndices to speedup search for state with a given name

0 commit comments

Comments
 (0)