Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions core/bifrost.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/maximhq/bifrost/core/providers/perplexity"
"github.com/maximhq/bifrost/core/providers/replicate"
"github.com/maximhq/bifrost/core/providers/sgl"
"github.com/maximhq/bifrost/core/providers/vllm"
providerUtils "github.com/maximhq/bifrost/core/providers/utils"
"github.com/maximhq/bifrost/core/providers/vertex"
"github.com/maximhq/bifrost/core/providers/xai"
Expand Down Expand Up @@ -3091,6 +3092,8 @@ func (bifrost *Bifrost) createBaseProvider(providerKey schemas.ModelProvider, co
return xai.NewXAIProvider(config, bifrost.logger)
case schemas.Replicate:
return replicate.NewReplicateProvider(config, bifrost.logger)
case schemas.VLLM:
return vllm.NewVLLMProvider(config, bifrost.logger)
default:
return nil, fmt.Errorf("unsupported provider: %s", targetProviderKey)
}
Expand Down
1 change: 1 addition & 0 deletions core/changelog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
- feat: add vllm provider support
15 changes: 15 additions & 0 deletions core/internal/llmtests/account.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ func (account *ComprehensiveTestAccount) GetConfiguredProviders() ([]schemas.Mod
schemas.Nebius,
schemas.XAI,
schemas.Replicate,
schemas.VLLM,
ProviderOpenAICustom,
}, nil
}
Expand Down Expand Up @@ -630,6 +631,20 @@ func (account *ComprehensiveTestAccount) GetConfigForProvider(providerKey schema
BufferSize: 10,
},
}, nil
case schemas.VLLM:
return &schemas.ProviderConfig{
NetworkConfig: schemas.NetworkConfig{
BaseURL: os.Getenv("VLLM_BASE_URL"),
DefaultRequestTimeoutInSeconds: 120,
MaxRetries: 10, // vllm is stable
RetryBackoffInitial: 5 * time.Second,
RetryBackoffMax: 3 * time.Minute,
},
ConcurrencyAndBufferSize: schemas.ConcurrencyAndBufferSize{
Concurrency: Concurrency,
BufferSize: 10,
},
}, nil
case schemas.Gemini:
return &schemas.ProviderConfig{
NetworkConfig: schemas.NetworkConfig{
Expand Down
32 changes: 28 additions & 4 deletions core/providers/openai/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -2169,10 +2169,12 @@ func (provider *OpenAIProvider) TranscriptionStream(ctx *schemas.BifrostContext,
authHeader,
provider.networkConfig.ExtraHeaders,
providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse),
false,
provider.GetProviderKey(),
postHookRunner,
nil,
nil,
nil,
provider.logger,
)
}
Expand All @@ -2187,8 +2189,10 @@ func HandleOpenAITranscriptionStreamRequest(
authHeader map[string]string,
extraHeaders map[string]string,
sendBackRawResponse bool,
accumulateText bool,
providerName schemas.ModelProvider,
postHookRunner schemas.PostHookRunner,
customChunkParser func([]byte) (*schemas.BifrostTranscriptionStreamResponse, bool),
postRequestConverter func(*OpenAITranscriptionRequest) *OpenAITranscriptionRequest,
postResponseConverter func(*schemas.BifrostTranscriptionStreamResponse) *schemas.BifrostTranscriptionStreamResponse,
logger schemas.Logger,
Expand Down Expand Up @@ -2291,6 +2295,7 @@ func HandleOpenAITranscriptionStreamRequest(

startTime := time.Now()
lastChunkTime := startTime
var fullTranscriptionText string

for scanner.Scan() {
// If context was cancelled/timed out, let defer handle it
Expand Down Expand Up @@ -2340,9 +2345,23 @@ func HandleOpenAITranscriptionStreamRequest(
}

var response schemas.BifrostTranscriptionStreamResponse
if err := sonic.Unmarshal([]byte(jsonData), &response); err != nil {
logger.Warn("Failed to parse stream response: %v", err)
continue
if customChunkParser != nil {
customChunk, ok := customChunkParser([]byte(jsonData))
if ok {
response = *customChunk
} else {
logger.Warn("Failed to parse stream response: %v", err)
continue
}
Comment on lines 2347 to +2355
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Fix undefined variable in custom chunk parser branch.

Line 2353 references err, which is undefined in this scope (compile error). Also guard against ok == true with a nil chunk to prevent panics.

🛠️ Proposed fix
-			customChunk, ok := customChunkParser([]byte(jsonData))
-			if ok {
-				response = *customChunk
-			} else {
-				logger.Warn("Failed to parse stream response: %v", err)
-				continue
-			}
+			customChunk, ok := customChunkParser([]byte(jsonData))
+			if !ok || customChunk == nil {
+				logger.Warn("customChunkParser returned no chunk")
+				continue
+			}
+			response = *customChunk
🤖 Prompt for AI Agents
In `@core/providers/openai/openai.go` around lines 2347 - 2355, In the
customChunkParser branch (customChunkParser, response,
schemas.BifrostTranscriptionStreamResponse, logger.Warn), remove the undefined
err reference and guard against nil: call customChunkParser([]byte(jsonData)),
then check if ok && customChunk != nil before doing response = *customChunk;
otherwise log a simple warning like "Failed to parse stream response" (no err
var) and continue. Ensure the code never dereferences a nil customChunk.

} else {
if err := sonic.Unmarshal([]byte(jsonData), &response); err != nil {
logger.Warn("Failed to parse stream response: %v", err)
continue
}
}

if accumulateText && response.Delta != nil {
fullTranscriptionText += *response.Delta
}

if postResponseConverter != nil {
Expand All @@ -2368,9 +2387,14 @@ func HandleOpenAITranscriptionStreamRequest(
response.ExtraFields.RawResponse = jsonData
}

if response.Usage != nil {
if response.Usage != nil || response.Type == schemas.TranscriptionStreamResponseTypeDone {
response.ExtraFields.Latency = time.Since(startTime).Milliseconds()
ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true)

if accumulateText {
response.Text = fullTranscriptionText
}

providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, nil, nil, &response, nil), responseChan)
return
}
Expand Down
5 changes: 5 additions & 0 deletions core/providers/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,11 @@ func HandleProviderResponse[T any](responseBody []byte, response *T, requestBody
}
}

var errorResp schemas.BifrostError
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason for this? we already are handling error below right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vllm sends ErrorField object as error (as 200), we are only handling unmarshalling errors below

if err := sonic.Unmarshal(responseBody, &errorResp); err == nil && errorResp.Error != nil && errorResp.Error.Message != "" {
return nil, nil, &errorResp
}

var wg sync.WaitGroup
var structuredErr, rawRequestErr, rawResponseErr error

Expand Down
19 changes: 19 additions & 0 deletions core/providers/vllm/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package vllm

import (
schemas "github.com/maximhq/bifrost/core/schemas"
)

// vLLMTranscriptionStreamChunk represents a single transcription streaming chunk from vLLM.
type vLLMTranscriptionStreamChunk struct {
Object string `json:"object"`
Choices []struct {
Delta struct {
Content *string `json:"content"`
ReasoningContent *string `json:"reasoning_content"`
} `json:"delta"`
FinishReason *string `json:"finish_reason,omitempty"`
StopReason *string `json:"stop_reason,omitempty"`
} `json:"choices"`
Usage *schemas.TranscriptionUsage `json:"usage,omitempty"`
}
40 changes: 40 additions & 0 deletions core/providers/vllm/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package vllm

import (
"github.com/bytedance/sonic"
schemas "github.com/maximhq/bifrost/core/schemas"
)

// parseVLLMTranscriptionStreamChunk parses vLLM's transcription stream JSON and returns
// a BifrostTranscriptionStreamResponse. It returns (nil, false) if the payload is not
// valid vLLM format or has no content to emit.
func parseVLLMTranscriptionStreamChunk(jsonData []byte) (*schemas.BifrostTranscriptionStreamResponse, bool) {
var chunk vLLMTranscriptionStreamChunk
response := &schemas.BifrostTranscriptionStreamResponse{}
if err := sonic.Unmarshal(jsonData, &chunk); err != nil {
return nil, false
}
// Done chunk: has usage (e.g. final event)
if chunk.Usage != nil {
return &schemas.BifrostTranscriptionStreamResponse{
Type: schemas.TranscriptionStreamResponseTypeDone,
Usage: chunk.Usage,
}, true
}
// Delta chunk: has choices[].delta.content
if len(chunk.Choices) == 0 || chunk.Choices[0].Delta.Content == nil {
return nil, false
}
if len(chunk.Choices) > 0 {
reason := chunk.Choices[0].FinishReason
if reason == nil && chunk.Choices[0].StopReason != nil {
reason = chunk.Choices[0].StopReason
}
if reason != nil && *reason == "stop" {
response.Text = *chunk.Choices[0].Delta.Content
response.Type = schemas.TranscriptionStreamResponseTypeDone
}
response.Delta = chunk.Choices[0].Delta.Content
}
Comment on lines +24 to +38
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Set a non-empty Type for delta chunks.

Delta responses currently return with Type == "", which can emit invalid stream events (Type is required in BifrostTranscriptionStreamResponse). Set it to the delta event type used elsewhere in schemas/transcriptions.go.

✅ Suggested patch
 		if reason != nil && *reason == "stop" {
 			response.Text = *chunk.Choices[0].Delta.Content
 			response.Type = schemas.TranscriptionStreamResponseTypeDone
 		}
 		response.Delta = chunk.Choices[0].Delta.Content
+		if response.Type == "" {
+			response.Type = schemas.TranscriptionStreamResponseTypeDelta
+		}
 	}
🤖 Prompt for AI Agents
In `@core/providers/vllm/utils.go` around lines 24 - 38, The delta branch
currently leaves response.Type empty; update the block handling chunk.Choices
(in utils.go) so that when chunk.Choices[0].Delta.Content is present but not a
"stop" finish, you assign response.Type =
schemas.TranscriptionStreamResponseTypeDelta (and still set response.Delta =
chunk.Choices[0].Delta.Content); when the finish reason is "stop" keep setting
response.Type = schemas.TranscriptionStreamResponseTypeDone as already done.
Adjust the logic around chunk.Choices[0].Delta.Content, FinishReason/StopReason
and response.Type to ensure every emitted delta response has a non-empty Type.

return response, true
}
Loading
Loading