maximhq · Radheshg04 · Feb 14, 2026 · coderabbitai · Feb 15, 2026 · Pratham-Mishra04
diff --git a/core/bifrost.go b/core/bifrost.go
@@ -36,6 +36,7 @@ import (
 	"github.com/maximhq/bifrost/core/providers/perplexity"
 	"github.com/maximhq/bifrost/core/providers/replicate"
 	"github.com/maximhq/bifrost/core/providers/sgl"
+	"github.com/maximhq/bifrost/core/providers/vllm"
 	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
 	"github.com/maximhq/bifrost/core/providers/vertex"
 	"github.com/maximhq/bifrost/core/providers/xai"
@@ -3091,6 +3092,8 @@ func (bifrost *Bifrost) createBaseProvider(providerKey schemas.ModelProvider, co
 		return xai.NewXAIProvider(config, bifrost.logger)
 	case schemas.Replicate:
 		return replicate.NewReplicateProvider(config, bifrost.logger)
+	case schemas.VLLM:
+		return vllm.NewVLLMProvider(config, bifrost.logger)
 	default:
 		return nil, fmt.Errorf("unsupported provider: %s", targetProviderKey)
 	}

diff --git a/core/changelog.md b/core/changelog.md
@@ -0,0 +1 @@
+- feat: add vllm provider support
diff --git a/core/internal/llmtests/account.go b/core/internal/llmtests/account.go
@@ -143,6 +143,7 @@ func (account *ComprehensiveTestAccount) GetConfiguredProviders() ([]schemas.Mod
 		schemas.Nebius,
 		schemas.XAI,
 		schemas.Replicate,
+		schemas.VLLM,
 		ProviderOpenAICustom,
 	}, nil
 }
@@ -630,6 +631,20 @@ func (account *ComprehensiveTestAccount) GetConfigForProvider(providerKey schema
 				BufferSize:  10,
 			},
 		}, nil
+	case schemas.VLLM:
+		return &schemas.ProviderConfig{
+			NetworkConfig: schemas.NetworkConfig{
+				BaseURL:                        os.Getenv("VLLM_BASE_URL"),
+				DefaultRequestTimeoutInSeconds: 120,
+				MaxRetries:                     10, // vllm is stable
+				RetryBackoffInitial:            5 * time.Second,
+				RetryBackoffMax:                3 * time.Minute,
+			},
+			ConcurrencyAndBufferSize: schemas.ConcurrencyAndBufferSize{
+				Concurrency: Concurrency,
+				BufferSize:  10,
+			},
+		}, nil
 	case schemas.Gemini:
 		return &schemas.ProviderConfig{
 			NetworkConfig: schemas.NetworkConfig{

diff --git a/core/providers/openai/openai.go b/core/providers/openai/openai.go
@@ -2169,10 +2169,12 @@ func (provider *OpenAIProvider) TranscriptionStream(ctx *schemas.BifrostContext,
 		authHeader,
 		provider.networkConfig.ExtraHeaders,
 		providerUtils.ShouldSendBackRawResponse(ctx, provider.sendBackRawResponse),
+		false,
 		provider.GetProviderKey(),
 		postHookRunner,
 		nil,
 		nil,
+		nil,
 		provider.logger,
 	)
 }
@@ -2187,8 +2189,10 @@ func HandleOpenAITranscriptionStreamRequest(
 	authHeader map[string]string,
 	extraHeaders map[string]string,
 	sendBackRawResponse bool,
+	accumulateText bool,
 	providerName schemas.ModelProvider,
 	postHookRunner schemas.PostHookRunner,
+	customChunkParser func([]byte) (*schemas.BifrostTranscriptionStreamResponse, bool),
 	postRequestConverter func(*OpenAITranscriptionRequest) *OpenAITranscriptionRequest,
 	postResponseConverter func(*schemas.BifrostTranscriptionStreamResponse) *schemas.BifrostTranscriptionStreamResponse,
 	logger schemas.Logger,
@@ -2291,6 +2295,7 @@ func HandleOpenAITranscriptionStreamRequest(
 
 		startTime := time.Now()
 		lastChunkTime := startTime
+		var fullTranscriptionText string
 
 		for scanner.Scan() {
 			// If context was cancelled/timed out, let defer handle it
@@ -2340,9 +2345,23 @@ func HandleOpenAITranscriptionStreamRequest(
 			}
 
 			var response schemas.BifrostTranscriptionStreamResponse
-			if err := sonic.Unmarshal([]byte(jsonData), &response); err != nil {
-				logger.Warn("Failed to parse stream response: %v", err)
-				continue
+			if customChunkParser != nil {
+				customChunk, ok := customChunkParser([]byte(jsonData))
+				if ok {
+					response = *customChunk
+				} else {
+					logger.Warn("Failed to parse stream response: %v", err)
+					continue
+				}
+			} else {
+				if err := sonic.Unmarshal([]byte(jsonData), &response); err != nil {
+					logger.Warn("Failed to parse stream response: %v", err)
+					continue
+				}
+			}
+
+			if accumulateText && response.Delta != nil {
+				fullTranscriptionText += *response.Delta
 			}
 
 			if postResponseConverter != nil {
@@ -2368,9 +2387,14 @@ func HandleOpenAITranscriptionStreamRequest(
 				response.ExtraFields.RawResponse = jsonData
 			}
 
-			if response.Usage != nil {
+			if response.Usage != nil || response.Type == schemas.TranscriptionStreamResponseTypeDone {
 				response.ExtraFields.Latency = time.Since(startTime).Milliseconds()
 				ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true)
+
+				if accumulateText {
+					response.Text = fullTranscriptionText
+				}
+
 				providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, nil, nil, &response, nil), responseChan)
 				return
 			}

diff --git a/core/providers/utils/utils.go b/core/providers/utils/utils.go
@@ -673,6 +673,11 @@ func HandleProviderResponse[T any](responseBody []byte, response *T, requestBody
 		}
 	}
 
+	var errorResp schemas.BifrostError
+	if err := sonic.Unmarshal(responseBody, &errorResp); err == nil && errorResp.Error != nil && errorResp.Error.Message != "" {
+		return nil, nil, &errorResp
+	}
+
 	var wg sync.WaitGroup
 	var structuredErr, rawRequestErr, rawResponseErr error
 

diff --git a/core/providers/vllm/types.go b/core/providers/vllm/types.go
@@ -0,0 +1,19 @@
+package vllm
+
+import (
+	schemas "github.com/maximhq/bifrost/core/schemas"
+)
+
+// vLLMTranscriptionStreamChunk represents a single transcription streaming chunk from vLLM.
+type vLLMTranscriptionStreamChunk struct {
+	Object  string `json:"object"`
+	Choices []struct {
+		Delta struct {
+			Content          *string `json:"content"`
+			ReasoningContent *string `json:"reasoning_content"`
+		} `json:"delta"`
+		FinishReason *string `json:"finish_reason,omitempty"`
+		StopReason   *string `json:"stop_reason,omitempty"`
+	} `json:"choices"`
+	Usage *schemas.TranscriptionUsage `json:"usage,omitempty"`
+}
diff --git a/core/providers/vllm/utils.go b/core/providers/vllm/utils.go
@@ -0,0 +1,40 @@
+package vllm
+
+import (
+	"github.com/bytedance/sonic"
+	schemas "github.com/maximhq/bifrost/core/schemas"
+)
+
+// parseVLLMTranscriptionStreamChunk parses vLLM's transcription stream JSON and returns
+// a BifrostTranscriptionStreamResponse. It returns (nil, false) if the payload is not
+// valid vLLM format or has no content to emit.
+func parseVLLMTranscriptionStreamChunk(jsonData []byte) (*schemas.BifrostTranscriptionStreamResponse, bool) {
+	var chunk vLLMTranscriptionStreamChunk
+	response := &schemas.BifrostTranscriptionStreamResponse{}
+	if err := sonic.Unmarshal(jsonData, &chunk); err != nil {
+		return nil, false
+	}
+	// Done chunk: has usage (e.g. final event)
+	if chunk.Usage != nil {
+		return &schemas.BifrostTranscriptionStreamResponse{
+			Type:  schemas.TranscriptionStreamResponseTypeDone,
+			Usage: chunk.Usage,
+		}, true
+	}
+	// Delta chunk: has choices[].delta.content
+	if len(chunk.Choices) == 0 || chunk.Choices[0].Delta.Content == nil {
+		return nil, false
+	}
+	if len(chunk.Choices) > 0 {
+		reason := chunk.Choices[0].FinishReason
+		if reason == nil && chunk.Choices[0].StopReason != nil {
+			reason = chunk.Choices[0].StopReason
+		}
+		if reason != nil && *reason == "stop" {
+			response.Text = *chunk.Choices[0].Delta.Content
+			response.Type = schemas.TranscriptionStreamResponseTypeDone
+		}
+		response.Delta = chunk.Choices[0].Delta.Content
+	}
+	return response, true
+}