Skip to content

Commit dc6f958

Browse files
committed
feat: support for image generation (#980)
This PR introduces image generation support to Bifrost and wires it into the existing gateway infrastructure. It adds core schemas, provider extensions, HTTP transport, streaming plumbing, cache integration, and initial UI support to address #950. Implementation is in progress. Completed functionality is marked, and remaining work will be completed in follow-up commits before marking this PR ready for review. Scoped Changes: - [x] New /v1/images/generations endpoint (OpenAI-compatible) - [x] Image generation via Chat Completion API (tool use pattern) - [x] Image generation via Responses API (native support) - [x] Streaming image delivery (base64 chunks) - [x] Semantic caching for image generation - [x] UI components for image rendering - [x] Provider implementations: OpenAI DALL-E, Azure DALL-E Rollout Plan: - [x] Phase 1: Core schema and provider implementation (OpenAI + Azure) - [x] Phase 2: HTTP transport and non-streaming endpoint - [x] Phase 3: Streaming support and accumulator - [x] Phase 4: Semantic cache integration (Base64 storage, 5min TTL) - [x] Phase 5: UI components and documentation - [ ] Bug fix - [x] Feature - [ ] Refactor - [x] Documentation - [ ] Chore/CI - [x] Core (Go) - [x] Transports (HTTP) - [x] Providers/Integrations - [x] Plugins - [x] UI (Next.js) - [x] Docs Unit Tests - [x] Schema serialization/deserialization - [x] Request transformation (Bifrost → OpenAI format) - [x] Response transformation (OpenAI → Bifrost format) - [x] Stream chunk accumulation - [x] Cache key generation Integration Tests - [x] End-to-end image generation (non-streaming) - [x] End-to-end streaming image generation - [x] Fallback to secondary provider - [x] Cache hit/miss scenarios - [x] Error handling (rate limits, invalid prompts) Load Tests - [x] Concurrent image generation requests - [x] Stream memory usage under load - [x] Cache performance at scale End-to-end image generation ```sh curl -X POST http://localhost:8080/v1/images/generations \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $API_KEY" \ -d '{ "model": "openai/dall-e-3", "prompt": "A serene Japanese garden with cherry blossoms", "size": "1024x1024", "quality": "high", "response_format": "b64_json" }' ``` Expected Outcome: ```json { "id": "abc123", "created": 1699999999, "model": "dall-e-3", "data": [ { "b64_json": "iVBORw0KGgo...", "revised_prompt": "A tranquil Japanese garden featuring blooming cherry blossom trees...", "index": 0 } ], "usage": { "prompt_tokens": 15, "total_tokens": 15 }, "extra_fields": { "provider": "openai", "latency_ms": 8500, "cache_debug": null } } ``` Unit Tests: ```sh go test -v github.com/maximhq/bifrost/core/providers/openai -run TestImage go test -v github.com/maximhq/bifrost/core -run TestImage ``` Expected outcome: PASS Note: Might need to replace remote paths with local modules in go.mod (Once PR is merged, this directive is not needed) ```sh replace github.com/maximhq/bifrost/core => ../../core ``` N/A - [ ] Yes - [x] No Closes #950 N/A - [x] I read `docs/contributing/README.md` and followed the guidelines - [x] I added/updated tests where appropriate - [x] I updated documentation where needed - [x] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable
1 parent d2ff684 commit dc6f958

File tree

130 files changed

+8338
-487
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+8338
-487
lines changed

core/bifrost.go

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,6 +1005,94 @@ func (bifrost *Bifrost) TranscriptionStreamRequest(ctx *schemas.BifrostContext,
10051005
return bifrost.handleStreamRequest(ctx, bifrostReq)
10061006
}
10071007

1008+
// ImageGenerationRequest sends an image generation request to the specified provider.
1009+
func (bifrost *Bifrost) ImageGenerationRequest(ctx *schemas.BifrostContext,
1010+
req *schemas.BifrostImageGenerationRequest) (*schemas.BifrostImageGenerationResponse, *schemas.BifrostError) {
1011+
if req == nil {
1012+
return nil, &schemas.BifrostError{
1013+
IsBifrostError: false,
1014+
Error: &schemas.ErrorField{
1015+
Message: "image generation request is nil",
1016+
},
1017+
ExtraFields: schemas.BifrostErrorExtraFields{
1018+
RequestType: schemas.ImageGenerationRequest,
1019+
},
1020+
}
1021+
}
1022+
if req.Input == nil || req.Input.Prompt == "" {
1023+
return nil, &schemas.BifrostError{
1024+
IsBifrostError: false,
1025+
Error: &schemas.ErrorField{
1026+
Message: "prompt not provided for image generation request",
1027+
},
1028+
ExtraFields: schemas.BifrostErrorExtraFields{
1029+
RequestType: schemas.ImageGenerationRequest,
1030+
Provider: req.Provider,
1031+
ModelRequested: req.Model,
1032+
},
1033+
}
1034+
}
1035+
1036+
bifrostReq := bifrost.getBifrostRequest()
1037+
bifrostReq.RequestType = schemas.ImageGenerationRequest
1038+
bifrostReq.ImageGenerationRequest = req
1039+
1040+
response, err := bifrost.handleRequest(ctx, bifrostReq)
1041+
if err != nil {
1042+
return nil, err
1043+
}
1044+
if response == nil || response.ImageGenerationResponse == nil {
1045+
return nil, &schemas.BifrostError{
1046+
IsBifrostError: false,
1047+
Error: &schemas.ErrorField{
1048+
Message: "received nil response from provider",
1049+
},
1050+
ExtraFields: schemas.BifrostErrorExtraFields{
1051+
RequestType: schemas.ImageGenerationRequest,
1052+
Provider: req.Provider,
1053+
ModelRequested: req.Model,
1054+
},
1055+
}
1056+
}
1057+
1058+
return response.ImageGenerationResponse, nil
1059+
}
1060+
1061+
// ImageGenerationStreamRequest sends an image generation stream request to the specified provider.
1062+
func (bifrost *Bifrost) ImageGenerationStreamRequest(ctx *schemas.BifrostContext,
1063+
req *schemas.BifrostImageGenerationRequest) (chan *schemas.BifrostStream, *schemas.BifrostError) {
1064+
if req == nil {
1065+
return nil, &schemas.BifrostError{
1066+
IsBifrostError: false,
1067+
Error: &schemas.ErrorField{
1068+
Message: "image generation stream request is nil",
1069+
},
1070+
ExtraFields: schemas.BifrostErrorExtraFields{
1071+
RequestType: schemas.ImageGenerationStreamRequest,
1072+
},
1073+
}
1074+
}
1075+
if req.Input == nil || req.Input.Prompt == "" {
1076+
return nil, &schemas.BifrostError{
1077+
IsBifrostError: false,
1078+
Error: &schemas.ErrorField{
1079+
Message: "prompt not provided for image generation stream request",
1080+
},
1081+
ExtraFields: schemas.BifrostErrorExtraFields{
1082+
RequestType: schemas.ImageGenerationStreamRequest,
1083+
Provider: req.Provider,
1084+
ModelRequested: req.Model,
1085+
},
1086+
}
1087+
}
1088+
1089+
bifrostReq := bifrost.getBifrostRequest()
1090+
bifrostReq.RequestType = schemas.ImageGenerationStreamRequest
1091+
bifrostReq.ImageGenerationRequest = req
1092+
1093+
return bifrost.handleStreamRequest(ctx, bifrostReq)
1094+
}
1095+
10081096
// BatchCreateRequest creates a new batch job for asynchronous processing.
10091097
func (bifrost *Bifrost) BatchCreateRequest(ctx *schemas.BifrostContext, req *schemas.BifrostBatchCreateRequest) (*schemas.BifrostBatchCreateResponse, *schemas.BifrostError) {
10101098
if req == nil {
@@ -2320,6 +2408,12 @@ func (bifrost *Bifrost) prepareFallbackRequest(req *schemas.BifrostRequest, fall
23202408
tmp.Model = fallback.Model
23212409
fallbackReq.TranscriptionRequest = &tmp
23222410
}
2411+
if req.ImageGenerationRequest != nil {
2412+
tmp := *req.ImageGenerationRequest
2413+
tmp.Provider = fallback.Provider
2414+
tmp.Model = fallback.Model
2415+
fallbackReq.ImageGenerationRequest = &tmp
2416+
}
23232417

23242418
return &fallbackReq
23252419
}
@@ -2761,6 +2855,9 @@ func (bifrost *Bifrost) tryStreamRequest(ctx *schemas.BifrostContext, req *schem
27612855
if streamMsg.BifrostTranscriptionStreamResponse != nil {
27622856
bifrostResponse.TranscriptionStreamResponse = streamMsg.BifrostTranscriptionStreamResponse
27632857
}
2858+
if streamMsg.BifrostImageGenerationStreamResponse != nil {
2859+
bifrostResponse.ImageGenerationStreamResponse = streamMsg.BifrostImageGenerationStreamResponse
2860+
}
27642861

27652862
// Run post hooks on the stream message
27662863
processedResponse, processedError := pipelinePostHookRunner(ctx, bifrostResponse, streamMsg.BifrostError)
@@ -2772,6 +2869,7 @@ func (bifrost *Bifrost) tryStreamRequest(ctx *schemas.BifrostContext, req *schem
27722869
streamResponse.BifrostResponsesStreamResponse = processedResponse.ResponsesStreamResponse
27732870
streamResponse.BifrostSpeechStreamResponse = processedResponse.SpeechStreamResponse
27742871
streamResponse.BifrostTranscriptionStreamResponse = processedResponse.TranscriptionStreamResponse
2872+
streamResponse.BifrostImageGenerationStreamResponse = processedResponse.ImageGenerationStreamResponse
27752873
}
27762874
if processedError != nil {
27772875
streamResponse.BifrostError = processedError
@@ -3246,6 +3344,12 @@ func (bifrost *Bifrost) handleProviderRequest(provider schemas.Provider, req *Ch
32463344
return nil, bifrostError
32473345
}
32483346
response.TranscriptionResponse = transcriptionResponse
3347+
case schemas.ImageGenerationRequest:
3348+
imageResponse, bifrostError := provider.ImageGeneration(req.Context, key, req.BifrostRequest.ImageGenerationRequest)
3349+
if bifrostError != nil {
3350+
return nil, bifrostError
3351+
}
3352+
response.ImageGenerationResponse = imageResponse
32493353
case schemas.FileUploadRequest:
32503354
fileUploadResponse, bifrostError := provider.FileUpload(req.Context, key, req.BifrostRequest.FileUploadRequest)
32513355
if bifrostError != nil {
@@ -3336,6 +3440,8 @@ func (bifrost *Bifrost) handleProviderStreamRequest(provider schemas.Provider, r
33363440
return provider.SpeechStream(req.Context, postHookRunner, key, req.BifrostRequest.SpeechRequest)
33373441
case schemas.TranscriptionStreamRequest:
33383442
return provider.TranscriptionStream(req.Context, postHookRunner, key, req.BifrostRequest.TranscriptionRequest)
3443+
case schemas.ImageGenerationStreamRequest:
3444+
return provider.ImageGenerationStream(req.Context, postHookRunner, key, req.BifrostRequest.ImageGenerationRequest)
33393445
default:
33403446
_, model, _ := req.BifrostRequest.GetRequestFields()
33413447
return nil, &schemas.BifrostError{
@@ -3651,6 +3757,7 @@ func resetBifrostRequest(req *schemas.BifrostRequest) {
36513757
req.EmbeddingRequest = nil
36523758
req.SpeechRequest = nil
36533759
req.TranscriptionRequest = nil
3760+
req.ImageGenerationRequest = nil
36543761
req.FileUploadRequest = nil
36553762
req.FileListRequest = nil
36563763
req.FileRetrieveRequest = nil

core/changelog.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
- chore: added case-insensitive helper methods for header and query parameter lookups in HTTPRequest
2-
- feat: added support for path parameter lookups in HTTPRequest
3-
- fix: missing request type in error response for anthropic SDK integration
1+
feat: added image generation request and response support

core/internal/testutil/account.go

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ type TestScenarios struct {
4444
Reasoning bool // Reasoning/thinking functionality via Responses API
4545
PromptCaching bool // Prompt caching functionality
4646
ListModels bool // List available models functionality
47+
ImageGeneration bool // Image generation functionality
48+
ImageGenerationStream bool // Streaming image generation functionality
4749
BatchCreate bool // Batch API create functionality
4850
BatchList bool // Batch API list functionality
4951
BatchRetrieve bool // Batch API retrieve functionality
@@ -79,6 +81,8 @@ type ComprehensiveTestConfig struct {
7981
SpeechSynthesisFallbacks []schemas.Fallback // for speech synthesis tests
8082
EmbeddingFallbacks []schemas.Fallback // for embedding tests
8183
SkipReason string // Reason to skip certain tests
84+
ImageGenerationModel string // Model for image generation
85+
ImageGenerationFallbacks []schemas.Fallback // Fallbacks for image generation
8286
ExternalTTSProvider schemas.ModelProvider // External TTS provider to use for testing
8387
ExternalTTSModel string // External TTS model to use for testing
8488
BatchExtraParams map[string]interface{} // Extra params for batch operations (e.g., role_arn, output_s3_uri for Bedrock)
@@ -223,6 +227,7 @@ func (account *ComprehensiveTestAccount) GetKeysForProvider(ctx context.Context,
223227
"gpt-4o-backup": "gpt-4o-3",
224228
"claude-opus-4-5": "claude-opus-4-5",
225229
"o1": "o1",
230+
"gpt-image-1": "gpt-image-1",
226231
"text-embedding-ada-002": "text-embedding-ada-002",
227232
},
228233
ClientID: bifrost.Ptr(os.Getenv("AZURE_CLIENT_ID")),
@@ -250,7 +255,7 @@ func (account *ComprehensiveTestAccount) GetKeysForProvider(ctx context.Context,
250255
return []schemas.Key{
251256
{
252257
Value: os.Getenv("VERTEX_API_KEY"),
253-
Models: []string{"text-multilingual-embedding-002", "google/gemini-2.0-flash-001"},
258+
Models: []string{"text-multilingual-embedding-002", "google/gemini-2.0-flash-001", "gemini-2.5-flash-image", "imagen-4.0-generate-001"},
254259
Weight: 1.0,
255260
VertexKeyConfig: &schemas.VertexKeyConfig{
256261
ProjectID: os.Getenv("VERTEX_PROJECT_ID"),
@@ -669,6 +674,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
669674
PromptCachingModel: "gpt-4.1",
670675
TranscriptionModel: "whisper-1",
671676
SpeechSynthesisModel: "tts-1",
677+
ImageGenerationModel: "gpt-image-1",
672678
ChatAudioModel: "gpt-4o-mini-audio-preview",
673679
Scenarios: TestScenarios{
674680
TextCompletion: false, // Not supported
@@ -688,6 +694,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
688694
SpeechSynthesisStream: true, // OpenAI supports streaming TTS
689695
Transcription: true, // OpenAI supports STT with Whisper
690696
TranscriptionStream: true, // OpenAI supports streaming STT
697+
ImageGeneration: true, // OpenAI supports image generation with DALL-E
698+
ImageGenerationStream: true, // OpenAI supports streaming image generation
691699
Embedding: true,
692700
Reasoning: true, // OpenAI supports reasoning via o1 models
693701
ListModels: true,
@@ -730,6 +738,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
730738
Transcription: false, // Not supported
731739
TranscriptionStream: false, // Not supported
732740
Embedding: false,
741+
ImageGeneration: false,
742+
ImageGenerationStream: false,
733743
ListModels: true,
734744
BatchCreate: true, // Anthropic supports batch API
735745
BatchList: true, // Anthropic supports batch API
@@ -764,6 +774,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
764774
Transcription: false, // Not supported
765775
TranscriptionStream: false, // Not supported
766776
Embedding: true,
777+
ImageGeneration: false,
778+
ImageGenerationStream: false,
767779
ListModels: true,
768780
BatchCreate: true, // Bedrock supports batch via Model Invocation Jobs (requires S3 config)
769781
BatchList: true, // Bedrock supports listing batch jobs
@@ -797,6 +809,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
797809
ImageBase64: false, // Check if supported
798810
MultipleImages: false, // Check if supported
799811
CompleteEnd2End: true,
812+
ImageGeneration: false,
813+
ImageGenerationStream: false,
800814
SpeechSynthesis: false, // Not supported
801815
SpeechSynthesisStream: false, // Not supported
802816
Transcription: false, // Not supported
@@ -815,6 +829,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
815829
ChatAudioModel: "gpt-4o-mini-audio-preview",
816830
TranscriptionModel: "whisper-1",
817831
SpeechSynthesisModel: "gpt-4o-mini-tts",
832+
ImageGenerationModel: "gpt-image-1",
818833
Scenarios: TestScenarios{
819834
TextCompletion: false, // Not supported
820835
SimpleChat: true,
@@ -833,6 +848,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
833848
Transcription: true, // Supported via whisper-1
834849
TranscriptionStream: false, // Not properly supported yet by Azure
835850
Embedding: true,
851+
ImageGeneration: true,
852+
ImageGenerationStream: true,
836853
ListModels: true,
837854
BatchCreate: true, // Azure supports batch API
838855
BatchList: true, // Azure supports batch API
@@ -851,9 +868,10 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
851868
},
852869
},
853870
{
854-
Provider: schemas.Vertex,
855-
ChatModel: "gemini-pro",
856-
TextModel: "", // Vertex focuses on chat
871+
Provider: schemas.Vertex,
872+
ChatModel: "gemini-pro",
873+
TextModel: "", // Vertex focuses on chat
874+
ImageGenerationModel: "imagen-4.0-generate-001",
857875
Scenarios: TestScenarios{
858876
TextCompletion: false, // Not typical
859877
SimpleChat: true,
@@ -867,6 +885,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
867885
ImageBase64: true,
868886
MultipleImages: true,
869887
CompleteEnd2End: true,
888+
ImageGeneration: true,
889+
ImageGenerationStream: false,
870890
SpeechSynthesis: false, // Not supported
871891
SpeechSynthesisStream: false, // Not supported
872892
Transcription: false, // Not supported
@@ -900,6 +920,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
900920
Transcription: true, // Supported via voxtral-mini-latest
901921
TranscriptionStream: true, // Supported via voxtral-mini-latest
902922
Embedding: true,
923+
ImageGeneration: false,
924+
ImageGenerationStream: false,
903925
ListModels: true,
904926
},
905927
Fallbacks: []schemas.Fallback{
@@ -928,6 +950,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
928950
Transcription: false, // Not supported
929951
TranscriptionStream: false, // Not supported
930952
Embedding: false,
953+
ImageGeneration: false,
954+
ImageGenerationStream: false,
931955
ListModels: true,
932956
},
933957
Fallbacks: []schemas.Fallback{
@@ -956,6 +980,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
956980
Transcription: false, // Not supported
957981
TranscriptionStream: false, // Not supported
958982
Embedding: false,
983+
ImageGeneration: false,
984+
ImageGenerationStream: false,
959985
ListModels: true,
960986
},
961987
Fallbacks: []schemas.Fallback{
@@ -997,6 +1023,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
9971023
TranscriptionModel: "gemini-2.5-flash",
9981024
SpeechSynthesisModel: "gemini-2.5-flash-preview-tts",
9991025
EmbeddingModel: "text-embedding-004",
1026+
ImageGenerationModel: "imagen-4.0-generate-001",
10001027
Scenarios: TestScenarios{
10011028
TextCompletion: false, // Not supported
10021029
SimpleChat: true,
@@ -1015,6 +1042,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
10151042
Transcription: true,
10161043
TranscriptionStream: true,
10171044
Embedding: true,
1045+
ImageGeneration: true,
1046+
ImageGenerationStream: false,
10181047
ListModels: true,
10191048
BatchCreate: true,
10201049
BatchList: true,
@@ -1048,6 +1077,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
10481077
ImageBase64: true,
10491078
MultipleImages: true,
10501079
CompleteEnd2End: true,
1080+
ImageGeneration: false,
1081+
ImageGenerationStream: false,
10511082
SpeechSynthesis: false,
10521083
SpeechSynthesisStream: false,
10531084
Transcription: false,
@@ -1066,6 +1097,7 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
10661097
EmbeddingModel: "sambanova/intfloat/e5-mistral-7b-instruct",
10671098
TranscriptionModel: "fal-ai/openai/whisper-large-v3",
10681099
SpeechSynthesisModel: "fal-ai/hexgrad/Kokoro-82M",
1100+
ImageGenerationModel: "fal-ai/fal-ai/flux-2",
10691101
Scenarios: TestScenarios{
10701102
TextCompletion: false,
10711103
TextCompletionStream: false,
@@ -1082,6 +1114,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
10821114
MultipleImages: true,
10831115
CompleteEnd2End: true,
10841116
Embedding: true,
1117+
ImageGeneration: true,
1118+
ImageGenerationStream: true,
10851119
Transcription: true,
10861120
TranscriptionStream: false,
10871121
SpeechSynthesis: true,
@@ -1094,9 +1128,10 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
10941128
},
10951129
},
10961130
{
1097-
Provider: schemas.XAI,
1098-
ChatModel: "grok-4-0709",
1099-
TextModel: "", // XAI focuses on chat
1131+
Provider: schemas.XAI,
1132+
ChatModel: "grok-4-0709",
1133+
TextModel: "", // XAI focuses on chat
1134+
ImageGenerationModel: "grok-2-image",
11001135
Scenarios: TestScenarios{
11011136
TextCompletion: false, // Not typical
11021137
SimpleChat: true,
@@ -1116,6 +1151,8 @@ var AllProviderConfigs = []ComprehensiveTestConfig{
11161151
TranscriptionStream: false, // Not supported
11171152
Embedding: false, // Not supported
11181153
ListModels: true,
1154+
ImageGeneration: true,
1155+
ImageGenerationStream: false,
11191156
},
11201157
},
11211158
}

0 commit comments

Comments
 (0)