-
Notifications
You must be signed in to change notification settings - Fork 242
feat: add vllm provider support #1649
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| - feat: add vllm provider support |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -673,6 +673,11 @@ func HandleProviderResponse[T any](responseBody []byte, response *T, requestBody | |
| } | ||
| } | ||
|
|
||
| var errorResp schemas.BifrostError | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any reason for this? we already are handling error below right?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. vllm sends ErrorField object as error (as 200), we are only handling unmarshalling errors below |
||
| if err := sonic.Unmarshal(responseBody, &errorResp); err == nil && errorResp.Error != nil && errorResp.Error.Message != "" { | ||
| return nil, nil, &errorResp | ||
| } | ||
|
|
||
| var wg sync.WaitGroup | ||
| var structuredErr, rawRequestErr, rawResponseErr error | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| package vllm | ||
|
|
||
| import ( | ||
| schemas "github.com/maximhq/bifrost/core/schemas" | ||
| ) | ||
|
|
||
| // vLLMTranscriptionStreamChunk represents a single transcription streaming chunk from vLLM. | ||
| type vLLMTranscriptionStreamChunk struct { | ||
| Object string `json:"object"` | ||
| Choices []struct { | ||
| Delta struct { | ||
| Content *string `json:"content"` | ||
| ReasoningContent *string `json:"reasoning_content"` | ||
| } `json:"delta"` | ||
| FinishReason *string `json:"finish_reason,omitempty"` | ||
| StopReason *string `json:"stop_reason,omitempty"` | ||
| } `json:"choices"` | ||
| Usage *schemas.TranscriptionUsage `json:"usage,omitempty"` | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| package vllm | ||
|
|
||
| import ( | ||
| "github.com/bytedance/sonic" | ||
| schemas "github.com/maximhq/bifrost/core/schemas" | ||
| ) | ||
|
|
||
| // parseVLLMTranscriptionStreamChunk parses vLLM's transcription stream JSON and returns | ||
| // a BifrostTranscriptionStreamResponse. It returns (nil, false) if the payload is not | ||
| // valid vLLM format or has no content to emit. | ||
| func parseVLLMTranscriptionStreamChunk(jsonData []byte) (*schemas.BifrostTranscriptionStreamResponse, bool) { | ||
| var chunk vLLMTranscriptionStreamChunk | ||
| response := &schemas.BifrostTranscriptionStreamResponse{} | ||
| if err := sonic.Unmarshal(jsonData, &chunk); err != nil { | ||
| return nil, false | ||
| } | ||
| // Done chunk: has usage (e.g. final event) | ||
| if chunk.Usage != nil { | ||
| return &schemas.BifrostTranscriptionStreamResponse{ | ||
| Type: schemas.TranscriptionStreamResponseTypeDone, | ||
| Usage: chunk.Usage, | ||
| }, true | ||
| } | ||
| // Delta chunk: has choices[].delta.content | ||
| if len(chunk.Choices) == 0 || chunk.Choices[0].Delta.Content == nil { | ||
| return nil, false | ||
| } | ||
| if len(chunk.Choices) > 0 { | ||
| reason := chunk.Choices[0].FinishReason | ||
| if reason == nil && chunk.Choices[0].StopReason != nil { | ||
| reason = chunk.Choices[0].StopReason | ||
| } | ||
| if reason != nil && *reason == "stop" { | ||
| response.Text = *chunk.Choices[0].Delta.Content | ||
| response.Type = schemas.TranscriptionStreamResponseTypeDone | ||
| } | ||
| response.Delta = chunk.Choices[0].Delta.Content | ||
| } | ||
|
Comment on lines
+24
to
+38
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Set a non-empty Type for delta chunks. Delta responses currently return with ✅ Suggested patch if reason != nil && *reason == "stop" {
response.Text = *chunk.Choices[0].Delta.Content
response.Type = schemas.TranscriptionStreamResponseTypeDone
}
response.Delta = chunk.Choices[0].Delta.Content
+ if response.Type == "" {
+ response.Type = schemas.TranscriptionStreamResponseTypeDelta
+ }
}🤖 Prompt for AI Agents |
||
| return response, true | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fix undefined variable in custom chunk parser branch.
Line 2353 references
err, which is undefined in this scope (compile error). Also guard againstok == truewith a nil chunk to prevent panics.🛠️ Proposed fix
🤖 Prompt for AI Agents