Skip to content

Commit fa527c2

Browse files
authored
feat: support for image generation (#980) (#1326)
## Summary Briefly explain the purpose of this PR and the problem it solves. ## Changes - What was changed and why - Any notable design decisions or trade-offs ## Type of change - [ ] Bug fix - [ ] Feature - [ ] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [ ] Plugins - [ ] UI (Next.js) - [ ] Docs ## How to test Describe the steps to validate this change. Include commands and expected outcomes. ```sh # Core/Transports go version go test ./... # UI cd ui pnpm i || npm i pnpm test || npm test pnpm build || npm run build ``` If adding new configs or environment variables, document them here. ## Screenshots/Recordings If UI changes, add before/after screenshots or short clips. ## Breaking changes - [ ] Yes - [ ] No If yes, describe impact and migration instructions. ## Related issues Link related issues and discussions. Example: Closes #123 ## Security considerations Note any security implications (auth, secrets, PII, sandboxing, etc.). ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable
1 parent 5adf6f6 commit fa527c2

File tree

128 files changed

+8541
-493
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+8541
-493
lines changed

core/bifrost.go

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,6 +1005,94 @@ func (bifrost *Bifrost) TranscriptionStreamRequest(ctx *schemas.BifrostContext,
10051005
return bifrost.handleStreamRequest(ctx, bifrostReq)
10061006
}
10071007

1008+
// ImageGenerationRequest sends an image generation request to the specified provider.
1009+
func (bifrost *Bifrost) ImageGenerationRequest(ctx *schemas.BifrostContext,
1010+
req *schemas.BifrostImageGenerationRequest) (*schemas.BifrostImageGenerationResponse, *schemas.BifrostError) {
1011+
if req == nil {
1012+
return nil, &schemas.BifrostError{
1013+
IsBifrostError: false,
1014+
Error: &schemas.ErrorField{
1015+
Message: "image generation request is nil",
1016+
},
1017+
ExtraFields: schemas.BifrostErrorExtraFields{
1018+
RequestType: schemas.ImageGenerationRequest,
1019+
},
1020+
}
1021+
}
1022+
if req.Input == nil || req.Input.Prompt == "" {
1023+
return nil, &schemas.BifrostError{
1024+
IsBifrostError: false,
1025+
Error: &schemas.ErrorField{
1026+
Message: "prompt not provided for image generation request",
1027+
},
1028+
ExtraFields: schemas.BifrostErrorExtraFields{
1029+
RequestType: schemas.ImageGenerationRequest,
1030+
Provider: req.Provider,
1031+
ModelRequested: req.Model,
1032+
},
1033+
}
1034+
}
1035+
1036+
bifrostReq := bifrost.getBifrostRequest()
1037+
bifrostReq.RequestType = schemas.ImageGenerationRequest
1038+
bifrostReq.ImageGenerationRequest = req
1039+
1040+
response, err := bifrost.handleRequest(ctx, bifrostReq)
1041+
if err != nil {
1042+
return nil, err
1043+
}
1044+
if response == nil || response.ImageGenerationResponse == nil {
1045+
return nil, &schemas.BifrostError{
1046+
IsBifrostError: false,
1047+
Error: &schemas.ErrorField{
1048+
Message: "received nil response from provider",
1049+
},
1050+
ExtraFields: schemas.BifrostErrorExtraFields{
1051+
RequestType: schemas.ImageGenerationRequest,
1052+
Provider: req.Provider,
1053+
ModelRequested: req.Model,
1054+
},
1055+
}
1056+
}
1057+
1058+
return response.ImageGenerationResponse, nil
1059+
}
1060+
1061+
// ImageGenerationStreamRequest sends an image generation stream request to the specified provider.
1062+
func (bifrost *Bifrost) ImageGenerationStreamRequest(ctx *schemas.BifrostContext,
1063+
req *schemas.BifrostImageGenerationRequest) (chan *schemas.BifrostStream, *schemas.BifrostError) {
1064+
if req == nil {
1065+
return nil, &schemas.BifrostError{
1066+
IsBifrostError: false,
1067+
Error: &schemas.ErrorField{
1068+
Message: "image generation stream request is nil",
1069+
},
1070+
ExtraFields: schemas.BifrostErrorExtraFields{
1071+
RequestType: schemas.ImageGenerationStreamRequest,
1072+
},
1073+
}
1074+
}
1075+
if req.Input == nil || req.Input.Prompt == "" {
1076+
return nil, &schemas.BifrostError{
1077+
IsBifrostError: false,
1078+
Error: &schemas.ErrorField{
1079+
Message: "prompt not provided for image generation stream request",
1080+
},
1081+
ExtraFields: schemas.BifrostErrorExtraFields{
1082+
RequestType: schemas.ImageGenerationStreamRequest,
1083+
Provider: req.Provider,
1084+
ModelRequested: req.Model,
1085+
},
1086+
}
1087+
}
1088+
1089+
bifrostReq := bifrost.getBifrostRequest()
1090+
bifrostReq.RequestType = schemas.ImageGenerationStreamRequest
1091+
bifrostReq.ImageGenerationRequest = req
1092+
1093+
return bifrost.handleStreamRequest(ctx, bifrostReq)
1094+
}
1095+
10081096
// BatchCreateRequest creates a new batch job for asynchronous processing.
10091097
func (bifrost *Bifrost) BatchCreateRequest(ctx *schemas.BifrostContext, req *schemas.BifrostBatchCreateRequest) (*schemas.BifrostBatchCreateResponse, *schemas.BifrostError) {
10101098
if req == nil {
@@ -2320,6 +2408,12 @@ func (bifrost *Bifrost) prepareFallbackRequest(req *schemas.BifrostRequest, fall
23202408
tmp.Model = fallback.Model
23212409
fallbackReq.TranscriptionRequest = &tmp
23222410
}
2411+
if req.ImageGenerationRequest != nil {
2412+
tmp := *req.ImageGenerationRequest
2413+
tmp.Provider = fallback.Provider
2414+
tmp.Model = fallback.Model
2415+
fallbackReq.ImageGenerationRequest = &tmp
2416+
}
23232417

23242418
return &fallbackReq
23252419
}
@@ -2773,6 +2867,9 @@ func (bifrost *Bifrost) tryStreamRequest(ctx *schemas.BifrostContext, req *schem
27732867
if streamMsg.BifrostTranscriptionStreamResponse != nil {
27742868
bifrostResponse.TranscriptionStreamResponse = streamMsg.BifrostTranscriptionStreamResponse
27752869
}
2870+
if streamMsg.BifrostImageGenerationStreamResponse != nil {
2871+
bifrostResponse.ImageGenerationStreamResponse = streamMsg.BifrostImageGenerationStreamResponse
2872+
}
27762873

27772874
// Run post hooks on the stream message
27782875
processedResponse, processedError := pipelinePostHookRunner(ctx, bifrostResponse, streamMsg.BifrostError)
@@ -2784,6 +2881,7 @@ func (bifrost *Bifrost) tryStreamRequest(ctx *schemas.BifrostContext, req *schem
27842881
streamResponse.BifrostResponsesStreamResponse = processedResponse.ResponsesStreamResponse
27852882
streamResponse.BifrostSpeechStreamResponse = processedResponse.SpeechStreamResponse
27862883
streamResponse.BifrostTranscriptionStreamResponse = processedResponse.TranscriptionStreamResponse
2884+
streamResponse.BifrostImageGenerationStreamResponse = processedResponse.ImageGenerationStreamResponse
27872885
}
27882886
if processedError != nil {
27892887
streamResponse.BifrostError = processedError
@@ -3260,6 +3358,12 @@ func (bifrost *Bifrost) handleProviderRequest(provider schemas.Provider, req *Ch
32603358
return nil, bifrostError
32613359
}
32623360
response.TranscriptionResponse = transcriptionResponse
3361+
case schemas.ImageGenerationRequest:
3362+
imageResponse, bifrostError := provider.ImageGeneration(req.Context, key, req.BifrostRequest.ImageGenerationRequest)
3363+
if bifrostError != nil {
3364+
return nil, bifrostError
3365+
}
3366+
response.ImageGenerationResponse = imageResponse
32633367
case schemas.FileUploadRequest:
32643368
fileUploadResponse, bifrostError := provider.FileUpload(req.Context, key, req.BifrostRequest.FileUploadRequest)
32653369
if bifrostError != nil {
@@ -3350,6 +3454,8 @@ func (bifrost *Bifrost) handleProviderStreamRequest(provider schemas.Provider, r
33503454
return provider.SpeechStream(req.Context, postHookRunner, key, req.BifrostRequest.SpeechRequest)
33513455
case schemas.TranscriptionStreamRequest:
33523456
return provider.TranscriptionStream(req.Context, postHookRunner, key, req.BifrostRequest.TranscriptionRequest)
3457+
case schemas.ImageGenerationStreamRequest:
3458+
return provider.ImageGenerationStream(req.Context, postHookRunner, key, req.BifrostRequest.ImageGenerationRequest)
33533459
default:
33543460
_, model, _ := req.BifrostRequest.GetRequestFields()
33553461
return nil, &schemas.BifrostError{
@@ -3665,6 +3771,7 @@ func resetBifrostRequest(req *schemas.BifrostRequest) {
36653771
req.EmbeddingRequest = nil
36663772
req.SpeechRequest = nil
36673773
req.TranscriptionRequest = nil
3774+
req.ImageGenerationRequest = nil
36683775
req.FileUploadRequest = nil
36693776
req.FileListRequest = nil
36703777
req.FileRetrieveRequest = nil

core/changelog.md

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1 @@
1-
- chore: added case-insensitive helper methods for header and query parameter lookups in HTTPRequest
2-
- feat: added support for path parameter lookups in HTTPRequest
3-
- fix: missing request type in error response for anthropic SDK integration
4-
- feat: add raw request data to bifrost error responses
5-
- fix: add support for AdditionalProperties structures (both boolean and object types)
6-
- fix: improve thought signature handling in gemini for function calls
7-
- fix: enhance citations structure to support multiple citation types
8-
- fix: anthropic streaming events through integration
1+
feat: added image generation request and response support

core/internal/testutil/account.go

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ type TestScenarios struct {
4444
Reasoning bool // Reasoning/thinking functionality via Responses API
4545
PromptCaching bool // Prompt caching functionality
4646
ListModels bool // List available models functionality
47+
ImageGeneration bool // Image generation functionality
48+
ImageGenerationStream bool // Streaming image generation functionality
4749
BatchCreate bool // Batch API create functionality
4850
BatchList bool // Batch API list functionality
4951
BatchRetrieve bool // Batch API retrieve functionality
@@ -80,6 +82,8 @@ type ComprehensiveTestConfig struct {
8082
SpeechSynthesisFallbacks []schemas.Fallback // for speech synthesis tests
8183
EmbeddingFallbacks []schemas.Fallback // for embedding tests
8284
SkipReason string // Reason to skip certain tests
85+
ImageGenerationModel string // Model for image generation
86+
ImageGenerationFallbacks []schemas.Fallback // Fallbacks for image generation
8387
ExternalTTSProvider schemas.ModelProvider // External TTS provider to use for testing
8488
ExternalTTSModel string // External TTS model to use for testing
8589
BatchExtraParams map[string]interface{} // Extra params for batch operations (e.g., role_arn, output_s3_uri for Bedrock)
@@ -224,6 +228,7 @@ func (account *ComprehensiveTestAccount) GetKeysForProvider(ctx context.Context,
224228
"gpt-4o-backup": "gpt-4o-3",
225229
"claude-opus-4-5": "claude-opus-4-5",
226230
"o1": "o1",
231+
"gpt-image-1": "gpt-image-1",
227232
"text-embedding-ada-002": "text-embedding-ada-002",
228233
},
229234
ClientID: bifrost.Ptr(os.Getenv("AZURE_CLIENT_ID")),
@@ -251,7 +256,7 @@ func (account *ComprehensiveTestAccount) GetKeysForProvider(ctx context.Context,
251256
return []schemas.Key{
252257
{
253258
Value: os.Getenv("VERTEX_API_KEY"),
254-
Models: []string{"text-multilingual-embedding-002", "google/gemini-2.0-flash-001"},
259+
Models: []string{"text-multilingual-embedding-002", "google/gemini-2.0-flash-001", "gemini-2.5-flash-image", "imagen-4.0-generate-001"},
255260
Weight: 1.0,
256261
VertexKeyConfig: &schemas.VertexKeyConfig{
257262
ProjectID: os.Getenv("VERTEX_PROJECT_ID"),
@@ -670,6 +675,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
670675
PromptCachingModel: "gpt-4.1",
671676
TranscriptionModel: "whisper-1",
672677
SpeechSynthesisModel: "tts-1",
678+
ImageGenerationModel: "gpt-image-1",
673679
ChatAudioModel: "gpt-4o-mini-audio-preview",
674680
Scenarios: TestScenarios{
675681
TextCompletion: false, // Not supported
@@ -689,6 +695,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
689695
SpeechSynthesisStream: true, // OpenAI supports streaming TTS
690696
Transcription: true, // OpenAI supports STT with Whisper
691697
TranscriptionStream: true, // OpenAI supports streaming STT
698+
ImageGeneration: true, // OpenAI supports image generation with DALL-E
699+
ImageGenerationStream: true, // OpenAI supports streaming image generation
692700
Embedding: true,
693701
Reasoning: true, // OpenAI supports reasoning via o1 models
694702
ListModels: true,
@@ -731,6 +739,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
731739
Transcription: false, // Not supported
732740
TranscriptionStream: false, // Not supported
733741
Embedding: false,
742+
ImageGeneration: false,
743+
ImageGenerationStream: false,
734744
ListModels: true,
735745
BatchCreate: true, // Anthropic supports batch API
736746
BatchList: true, // Anthropic supports batch API
@@ -765,6 +775,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
765775
Transcription: false, // Not supported
766776
TranscriptionStream: false, // Not supported
767777
Embedding: true,
778+
ImageGeneration: false,
779+
ImageGenerationStream: false,
768780
ListModels: true,
769781
BatchCreate: true, // Bedrock supports batch via Model Invocation Jobs (requires S3 config)
770782
BatchList: true, // Bedrock supports listing batch jobs
@@ -798,6 +810,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
798810
ImageBase64: false, // Check if supported
799811
MultipleImages: false, // Check if supported
800812
CompleteEnd2End: true,
813+
ImageGeneration: false,
814+
ImageGenerationStream: false,
801815
SpeechSynthesis: false, // Not supported
802816
SpeechSynthesisStream: false, // Not supported
803817
Transcription: false, // Not supported
@@ -816,6 +830,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
816830
ChatAudioModel: "gpt-4o-mini-audio-preview",
817831
TranscriptionModel: "whisper-1",
818832
SpeechSynthesisModel: "gpt-4o-mini-tts",
833+
ImageGenerationModel: "gpt-image-1",
819834
Scenarios: TestScenarios{
820835
TextCompletion: false, // Not supported
821836
SimpleChat: true,
@@ -834,6 +849,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
834849
Transcription: true, // Supported via whisper-1
835850
TranscriptionStream: false, // Not properly supported yet by Azure
836851
Embedding: true,
852+
ImageGeneration: true,
853+
ImageGenerationStream: true,
837854
ListModels: true,
838855
BatchCreate: true, // Azure supports batch API
839856
BatchList: true, // Azure supports batch API
@@ -852,9 +869,10 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
852869
},
853870
},
854871
{
855-
Provider: schemas.Vertex,
856-
ChatModel: "gemini-pro",
857-
TextModel: "", // Vertex focuses on chat
872+
Provider: schemas.Vertex,
873+
ChatModel: "gemini-pro",
874+
TextModel: "", // Vertex focuses on chat
875+
ImageGenerationModel: "imagen-4.0-generate-001",
858876
Scenarios: TestScenarios{
859877
TextCompletion: false, // Not typical
860878
SimpleChat: true,
@@ -868,6 +886,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
868886
ImageBase64: true,
869887
MultipleImages: true,
870888
CompleteEnd2End: true,
889+
ImageGeneration: true,
890+
ImageGenerationStream: false,
871891
SpeechSynthesis: false, // Not supported
872892
SpeechSynthesisStream: false, // Not supported
873893
Transcription: false, // Not supported
@@ -901,6 +921,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
901921
Transcription: true, // Supported via voxtral-mini-latest
902922
TranscriptionStream: true, // Supported via voxtral-mini-latest
903923
Embedding: true,
924+
ImageGeneration: false,
925+
ImageGenerationStream: false,
904926
ListModels: true,
905927
},
906928
Fallbacks: []schemas.Fallback{
@@ -929,6 +951,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
929951
Transcription: false, // Not supported
930952
TranscriptionStream: false, // Not supported
931953
Embedding: false,
954+
ImageGeneration: false,
955+
ImageGenerationStream: false,
932956
ListModels: true,
933957
},
934958
Fallbacks: []schemas.Fallback{
@@ -957,6 +981,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
957981
Transcription: false, // Not supported
958982
TranscriptionStream: false, // Not supported
959983
Embedding: false,
984+
ImageGeneration: false,
985+
ImageGenerationStream: false,
960986
ListModels: true,
961987
},
962988
Fallbacks: []schemas.Fallback{
@@ -998,6 +1024,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
9981024
TranscriptionModel: "gemini-2.5-flash",
9991025
SpeechSynthesisModel: "gemini-2.5-flash-preview-tts",
10001026
EmbeddingModel: "text-embedding-004",
1027+
ImageGenerationModel: "imagen-4.0-generate-001",
10011028
Scenarios: TestScenarios{
10021029
TextCompletion: false, // Not supported
10031030
SimpleChat: true,
@@ -1016,6 +1043,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
10161043
Transcription: true,
10171044
TranscriptionStream: true,
10181045
Embedding: true,
1046+
ImageGeneration: true,
1047+
ImageGenerationStream: false,
10191048
ListModels: true,
10201049
BatchCreate: true,
10211050
BatchList: true,
@@ -1049,6 +1078,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
10491078
ImageBase64: true,
10501079
MultipleImages: true,
10511080
CompleteEnd2End: true,
1081+
ImageGeneration: false,
1082+
ImageGenerationStream: false,
10521083
SpeechSynthesis: false,
10531084
SpeechSynthesisStream: false,
10541085
Transcription: false,
@@ -1067,6 +1098,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
10671098
EmbeddingModel: "sambanova/intfloat/e5-mistral-7b-instruct",
10681099
TranscriptionModel: "fal-ai/openai/whisper-large-v3",
10691100
SpeechSynthesisModel: "fal-ai/hexgrad/Kokoro-82M",
1101+
ImageGenerationModel: "fal-ai/fal-ai/flux-2",
10701102
Scenarios: TestScenarios{
10711103
TextCompletion: false,
10721104
TextCompletionStream: false,
@@ -1083,6 +1115,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
10831115
MultipleImages: true,
10841116
CompleteEnd2End: true,
10851117
Embedding: true,
1118+
ImageGeneration: true,
1119+
ImageGenerationStream: true,
10861120
Transcription: true,
10871121
TranscriptionStream: false,
10881122
SpeechSynthesis: true,
@@ -1095,9 +1129,10 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
10951129
},
10961130
},
10971131
{
1098-
Provider: schemas.XAI,
1099-
ChatModel: "grok-4-0709",
1100-
TextModel: "", // XAI focuses on chat
1132+
Provider: schemas.XAI,
1133+
ChatModel: "grok-4-0709",
1134+
TextModel: "", // XAI focuses on chat
1135+
ImageGenerationModel: "grok-2-image",
11011136
Scenarios: TestScenarios{
11021137
TextCompletion: false, // Not typical
11031138
SimpleChat: true,
@@ -1117,6 +1152,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
11171152
TranscriptionStream: false, // Not supported
11181153
Embedding: false, // Not supported
11191154
ListModels: true,
1155+
ImageGeneration: true,
1156+
ImageGenerationStream: false,
11201157
},
11211158
},
11221159
}

0 commit comments

Comments
 (0)