diff --git a/.devcontainer/.zshrc b/.devcontainer/.zshrc
deleted file mode 100644
index 34ac85d..0000000
--- a/.devcontainer/.zshrc
+++ /dev/null
@@ -1,111 +0,0 @@
-# If you come from bash you might have to change your $PATH.
-export PATH=$HOME/bin:$HOME/.local/bin:/usr/local/bin:/opt/aarch64-linux-musl-cross/bin:/opt/x86_64-linux-musl-cross/bin:$PATH
-
-# Path to your Oh My Zsh installation.
-export ZSH=$HOME/.oh-my-zsh
-
-# Set name of the theme to load --- if set to "random", it will
-# load a random theme each time Oh My Zsh is loaded, in which case,
-# to know which specific one was loaded, run: echo $RANDOM_THEME
-# See https://github.com/ohmyzsh/ohmyzsh/wiki/Themes
-ZSH_THEME="powerlevel10k/powerlevel10k"
-
-# Set list of themes to pick from when loading at random
-# Setting this variable when ZSH_THEME="powerlevel10k/powerlevel10k"
-# a theme from this variable instead of looking in $ZSH/themes/
-# If set to an empty array, this variable will have no effect.
-# ZSH_THEME_RANDOM_CANDIDATES=( "robbyrussell" "agnoster" )
-
-# Uncomment the following line to use case-sensitive completion.
-# CASE_SENSITIVE="true"
-
-# Uncomment the following line to use hyphen-insensitive completion.
-# Case-sensitive completion must be off. _ and - will be interchangeable.
-# HYPHEN_INSENSITIVE="true"
-
-# Uncomment one of the following lines to change the auto-update behavior
-# zstyle ':omz:update' mode disabled # disable automatic updates
-# zstyle ':omz:update' mode auto # update automatically without asking
-# zstyle ':omz:update' mode reminder # just remind me to update when it's time
-
-# Uncomment the following line to change how often to auto-update (in days).
-# zstyle ':omz:update' frequency 13
-
-# Uncomment the following line if pasting URLs and other text is messed up.
-# DISABLE_MAGIC_FUNCTIONS="true"
-
-# Uncomment the following line to disable colors in ls.
-# DISABLE_LS_COLORS="true"
-
-# Uncomment the following line to disable auto-setting terminal title.
-# DISABLE_AUTO_TITLE="true"
-
-# Uncomment the following line to enable command auto-correction.
-# ENABLE_CORRECTION="true"
-
-# Uncomment the following line to display red dots whilst waiting for completion.
-# You can also set it to another string to have that shown instead of the default red dots.
-# e.g. COMPLETION_WAITING_DOTS="%F{yellow}waiting...%f"
-# Caution: this setting can cause issues with multiline prompts in zsh < 5.7.1 (see #5765)
-# COMPLETION_WAITING_DOTS="true"
-
-# Uncomment the following line if you want to disable marking untracked files
-# under VCS as dirty. This makes repository status check for large repositories
-# much, much faster.
-# DISABLE_UNTRACKED_FILES_DIRTY="true"
-
-# Uncomment the following line if you want to change the command execution time
-# stamp shown in the history command output.
-# You can set one of the optional three formats:
-# "mm/dd/yyyy"|"dd.mm.yyyy"|"yyyy-mm-dd"
-# or set a custom format using the strftime function format specifications,
-# see 'man strftime' for details.
-# HIST_STAMPS="mm/dd/yyyy"
-
-# Would you like to use another custom folder than $ZSH/custom?
-# ZSH_CUSTOM=/path/to/new-custom-folder
-
-# Which plugins would you like to load?
-# Standard plugins can be found in $ZSH/plugins/
-# Custom plugins may be added to $ZSH_CUSTOM/plugins/
-# Example format: plugins=(rails git textmate ruby lighthouse)
-# Add wisely, as too many plugins slow down shell startup.
-plugins=(git)
-
-source $ZSH/oh-my-zsh.sh
-
-# User configuration
-
-# export MANPATH="/usr/local/man:$MANPATH"
-
-# You may need to manually set your language environment
-# export LANG=en_US.UTF-8
-
-# Preferred editor for local and remote sessions
-# if [[ -n $SSH_CONNECTION ]]; then
-# export EDITOR='vim'
-# else
-# export EDITOR='mvim'
-# fi
-
-# Compilation flags
-# export ARCHFLAGS="-arch $(uname -m)"
-
-# Set personal aliases, overriding those provided by Oh My Zsh libs,
-# plugins, and themes. Aliases can be placed here, though Oh My Zsh
-# users are encouraged to define aliases within a top-level file in
-# the $ZSH_CUSTOM folder, with .zsh extension. Examples:
-# - $ZSH_CUSTOM/aliases.zsh
-# - $ZSH_CUSTOM/macos.zsh
-# For a full list of active aliases, run `alias`.
-#
-# Example aliases
-# alias zshconfig="mate ~/.zshrc"
-# alias ohmyzsh="mate ~/.oh-my-zsh"
-DISABLE_AUTO_UPDATE=true
-DISABLE_UPDATE_PROMPT=true
-[[ ! -f ~/.p10k.zsh ]] || source ~/.p10k.zsh
-
-POWERLEVEL9K_DISABLE_CONFIGURATION_WIZARD=true
-
-eval "$(task --completion zsh)"
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 5dba761..5119189 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -1,9 +1,14 @@
FROM mcr.microsoft.com/devcontainers/rust:1.0.20-bookworm
-ENV ZSH_CUSTOM=/home/vscode/.oh-my-zsh/custom \
- TASK_VERSION=v3.41.0
+ENV TASK_VERSION=v3.43.3
RUN apt-get update && \
+ # Install GitHub CLI
+ curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg && \
+ chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg && \
+ echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null && \
+ apt-get update && \
+ apt-get install -y gh && \
# Install nodejs and npm
curl -fsSL https://deb.nodesource.com/setup_lts.x | bash - && \
# Install Task
@@ -14,6 +19,12 @@ RUN apt-get update && \
RUN npm install -g semantic-release @semantic-release/changelog @semantic-release/exec @semantic-release/git @semantic-release/github conventional-changelog-conventionalcommits
-RUN git clone --depth=1 https://github.com/romkatv/powerlevel10k.git ${ZSH_CUSTOM}/themes/powerlevel10k
-
USER vscode
+
+# Use Powerlevel10k theme
+RUN git clone --depth=1 https://github.com/romkatv/powerlevel10k.git /home/vscode/.powerlevel10k && \
+ echo 'source /home/vscode/.powerlevel10k/powerlevel10k.zsh-theme' >> /home/vscode/.zshrc && \
+ echo 'POWERLEVEL9K_DISABLE_CONFIGURATION_WIZARD=true' >> /home/vscode/.zshrc
+
+RUN echo "source <(task --completion zsh)" >> /home/vscode/.zshrc
+RUN echo "source <(gh completion -s zsh)" >> /home/vscode/.zshrc
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 1a25dc1..71eaf46 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,6 +1,9 @@
{
"name": "Debian with Rust",
"dockerFile": "Dockerfile",
+ "features": {
+ "ghcr.io/devcontainers/features/docker-in-docker:latest": {}
+ },
"customizations": {
"vscode": {
"extensions": [
@@ -33,14 +36,46 @@
"dev.containers.copyGitConfig": true,
"githubPullRequests.experimental.chat": true,
"githubPullRequests.experimental.notificationsView": true,
- "files.insertFinalNewline": true
+ "files.insertFinalNewline": true,
+ "github.copilot.enable": {
+ "*": true
+ },
+ "github.copilot.advanced": {
+ "authProvider": "github"
+ },
+ "github.copilot.chat.codeGeneration.useInstructionFiles": true,
+ "github.copilot.chat.codeGeneration.instructions": [
+ {
+ "file": ".github/copilot-instructions.md"
+ },
+ {
+ "file": "../README.md"
+ }
+ ],
+ "github.copilot.chat.commitMessageGeneration.instructions": [
+ {
+ "text": "Always use conventional commit message format."
+ }
+ ],
+ "mcp": {
+ "servers": {
+ "context7": {
+ "command": "docker",
+ "args": [
+ "run",
+ "-i",
+ "--rm",
+ "node:lts",
+ "npx",
+ "-y",
+ "@upstash/context7-mcp@latest"
+ ]
+ }
+ }
+ }
}
}
},
- "mounts": [
- "source=${localWorkspaceFolder}/.devcontainer/.zshrc,target=/home/vscode/.zshrc,type=bind,consistency=cached",
- "source=${localWorkspaceFolder}/.devcontainer/launch.json,target=/workspaces/inference-gateway/rust-sdk/.vscode/launch.json,type=bind,consistency=cached"
- ],
"remoteEnv": {
"GITHUB_TOKEN": "${localEnv:GITHUB_TOKEN}"
}
diff --git a/.devcontainer/launch.json b/.devcontainer/launch.json
deleted file mode 100644
index c4f7a08..0000000
--- a/.devcontainer/launch.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
- "version": "0.2.0",
- "configurations": [
- {
- "type": "lldb",
- "request": "launch",
- "name": "Debug",
- "env": {
- "RUST_BACKTRACE": "1",
- "RUST_LOG": "debug"
- },
- "cwd": "${workspaceFolder}"
- }
- ]
-}
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
new file mode 100644
index 0000000..c7b4a51
--- /dev/null
+++ b/.github/copilot-instructions.md
@@ -0,0 +1,49 @@
+# Custom Instructions for Copilot
+
+Today is May 26, 2025.
+
+- Always use context7 to check for the latest updates, features, or best practices of a library relevant to the task at hand.
+- Always prefer Table-Driven Testing: When writing tests.
+- Always use Early Returns: Favor early returns to simplify logic and avoid deep nesting with if-else structures.
+- Always prefer switch statements over if-else chains: Use switch statements for cleaner and more readable code when checking multiple conditions.
+- Always run `task analyse` to ensure code quality and catch potential issues before committing.
+- Always run `task lint` before committing code to ensure it adheres to the project's linting rules.
+- Always run `task test` before committing code to ensure all tests pass.
+- Always search for the simplest solution first before considering more complex alternatives.
+- Always prefer type safety over dynamic typing: Use strong typing and interfaces to ensure type safety and reduce runtime errors.
+- When working on MCP (Model Context Protocol) related tasks, always refer to the official MCP documentation and examples for guidance and ensure you run `task jrpc-mcp-schema-download` and `task generate` to keep the MCP Golang types up to date.
+- When possible code to an interface so it's easier to mock in tests.
+- When writing tests, each test case should have it's own isolated mock server mock dependecies so it's easier to understand and maintain.
+
+## Development Workflow
+
+### Configuration Changes
+
+When adding new configuration fields:
+
+1. Run `task oas-download` - OpenAPI is the source of truth - readonly file.
+2. If added new Schemas to openapi.yaml, update internal/openapi/schemas.go to include the new schemas
+3. Run `task lint` to ensure code quality
+4. Run `task analyse` to catch potential issues
+5. Run `task test` to ensure all tests pass
+6. Update the README.md file or any documentation files with the recently added implementation
+
+## Available Tools and MCPs
+
+- context7 - Helps by finding the latest updates, features, or best practices of a library relevant to the task at hand.
+
+## Related Repositories
+
+- [Inference Gateway](https://github.com/inference-gateway)
+ - [Inference Gateway UI](https://github.com/inference-gateway/ui)
+ - [Go SDK](https://github.com/inference-gateway/go-sdk)
+ - [Rust SDK](https://github.com/inference-gateway/rust-sdk)
+ - [TypeScript SDK](https://github.com/inference-gateway/typescript-sdk)
+ - [Documentation](https://github.com/inference-gateway/docs)
+
+## MCP Useful links
+
+- [Introduction](https://modelcontextprotocol.io/introduction)
+- [Specification](https://modelcontextprotocol.io/specification)
+- [Examples](https://modelcontextprotocol.io/examples)
+- [Schema](https://raw.githubusercontent.com/modelcontextprotocol/modelcontextprotocol/refs/heads/main/schema/draft/schema.json)
diff --git a/README.md b/README.md
index 50516d3..b89acd7 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,7 @@ An SDK written in Rust for the [Inference Gateway](https://github.com/inference-
- [Creating a Client](#creating-a-client)
- [Listing Models](#listing-models)
- [Listing Models from a specific provider](#listing-models-from-a-specific-provider)
+ - [Listing MCP Tools](#listing-mcp-tools)
- [Generating Content](#generating-content)
- [Streaming Content](#streaming-content)
- [Tool-Use](#tool-use)
@@ -32,6 +33,7 @@ use inference_gateway_sdk::{
InferenceGatewayAPI,
InferenceGatewayClient,
ListModelsResponse,
+ ListToolsResponse,
Message,
Provider,
MessageRole
@@ -137,6 +139,41 @@ for model in response.data {
// ...Rest of the main function
```
+### Listing MCP Tools
+
+To list all available MCP (Model Context Protocol) tools from all configured MCP servers, use the `list_tools` method:
+
+```rust
+use inference_gateway_sdk::{
+ GatewayError,
+ InferenceGatewayAPI,
+ InferenceGatewayClient,
+ ListToolsResponse,
+};
+use log::info;
+
+#[tokio::main]
+async fn main() -> Result<(), GatewayError> {
+ // ...Create a client
+
+ // List all MCP tools from all configured servers
+ let response: ListToolsResponse = client.list_tools().await?;
+ info!("Found {} MCP tools", response.data.len());
+
+ for tool in response.data {
+ info!("Tool: {} from server: {}", tool.name, tool.server);
+ info!("Description: {}", tool.description);
+ if let Some(schema) = &tool.input_schema {
+ info!("Input schema: {}", schema);
+ }
+ }
+
+ Ok(())
+}
+```
+
+Note: This functionality requires that MCP servers are configured and exposed in your Inference Gateway instance. If MCP is not exposed, you'll receive a `403 Forbidden` error.
+
### Generating Content
To generate content using a model, use the `generate_content` method:
diff --git a/openapi.yaml b/openapi.yaml
index b7c4528..3340141 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -7,27 +7,43 @@ info:
OpenAI, Groq, Ollama, and other providers are supported.
OpenAI compatible API for using with existing clients.
Unified API for all providers.
+ contact:
+ name: Inference Gateway
+ url: https://inference-gateway.github.io/docs/
version: 1.0.0
license:
name: MIT
url: https://github.com/inference-gateway/inference-gateway/blob/main/LICENSE
servers:
- url: http://localhost:8080
+ description: Default server without version prefix for healthcheck and proxy and points
+ x-server-tags: ["Health", "Proxy"]
+ - url: http://localhost:8080/v1
+ description: Default server with version prefix for listing models and chat completions
+ x-server-tags: ["Models", "Completions"]
+ - url: https://api.inference-gateway.local/v1
+ description: Local server with version prefix for listing models and chat completions
+ x-server-tags: ["Models", "Completions"]
tags:
- name: Models
description: List and describe the various models available in the API.
- name: Completions
description: Generate completions from the models.
+ - name: Tools
+ description: List and manage MCP tools.
- name: Proxy
description: Proxy requests to provider endpoints.
- name: Health
description: Health check
paths:
- /v1/models:
+ /models:
get:
operationId: listModels
tags:
- Models
+ description: |
+ Lists the currently available models, and provides basic information
+ about each one such as the owner and availability.
summary:
Lists the currently available models, and provides basic information
about each one such as the owner and availability.
@@ -38,7 +54,7 @@ paths:
in: query
required: false
schema:
- $ref: "#/components/schemas/Providers"
+ $ref: "#/components/schemas/Provider"
description: Specific provider to query (optional)
responses:
"200":
@@ -53,53 +69,64 @@ paths:
value:
object: "list"
data:
- - id: "gpt-4o"
+ - id: "openai/gpt-4o"
object: "model"
created: 1686935002
owned_by: "openai"
- - id: "llama-3.3-70b-versatile"
+ served_by: "openai"
+ - id: "openai/llama-3.3-70b-versatile"
object: "model"
created: 1723651281
owned_by: "groq"
- - id: "claude-3-opus-20240229"
+ served_by: "groq"
+ - id: "cohere/claude-3-opus-20240229"
object: "model"
created: 1708905600
owned_by: "anthropic"
- - id: "command-r"
+ served_by: "anthropic"
+ - id: "cohere/command-r"
object: "model"
created: 1707868800
owned_by: "cohere"
- - id: "phi3:3.8b"
+ served_by: "cohere"
+ - id: "ollama/phi3:3.8b"
object: "model"
created: 1718441600
owned_by: "ollama"
+ served_by: "ollama"
singleProvider:
summary: Models from a specific provider
value:
object: "list"
data:
- - id: "gpt-4o"
+ - id: "openai/gpt-4o"
object: "model"
created: 1686935002
owned_by: "openai"
- - id: "gpt-4-turbo"
+ served_by: "openai"
+ - id: "openai/gpt-4-turbo"
object: "model"
created: 1687882410
owned_by: "openai"
- - id: "gpt-3.5-turbo"
+ served_by: "openai"
+ - id: "openai/gpt-3.5-turbo"
object: "model"
created: 1677649963
owned_by: "openai"
+ served_by: "openai"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalError"
- /v1/chat/completions:
+ /chat/completions:
post:
- summary: Create a chat completion
- description: Creates a completion for the chat message with the specified provider
+ operationId: createChatCompletion
tags:
- Completions
+ description: |
+ Generates a chat completion based on the provided input.
+ The completion can be streamed to the client as it is generated.
+ summary: Create a chat completion
security:
- bearerAuth: []
parameters:
@@ -107,93 +134,56 @@ paths:
in: query
required: false
schema:
- $ref: "#/components/schemas/Providers"
+ $ref: "#/components/schemas/Provider"
description: Specific provider to use (default determined by model)
requestBody:
- required: true
- content:
- application/json:
- schema:
- type: object
- required:
- - model
- - messages
- properties:
- model:
- type: string
- description: Model ID to use
- messages:
- type: array
- items:
- $ref: "#/components/schemas/Message"
- temperature:
- type: number
- format: float
- default: 0.7
- stream:
- type: boolean
- default: false
- tools:
- type: array
- items:
- type: object
- max_tokens:
- type: integer
+ $ref: "#/components/requestBodies/CreateChatCompletionRequest"
responses:
"200":
description: Successful response
content:
application/json:
schema:
- type: object
- properties:
- id:
- type: string
- object:
- type: string
- example: "chat.completion"
- created:
- type: integer
- format: int64
- model:
- type: string
- choices:
- type: array
- items:
- type: object
- properties:
- index:
- type: integer
- message:
- $ref: "#/components/schemas/Message"
- finish_reason:
- type: string
- enum: [stop, length, tool_calls, content_filter]
- usage:
- type: object
- properties:
- prompt_tokens:
- type: integer
- completion_tokens:
- type: integer
- total_tokens:
- type: integer
+ $ref: "#/components/schemas/CreateChatCompletionResponse"
text/event-stream:
schema:
- type: string
+ $ref: "#/components/schemas/SSEvent"
"400":
$ref: "#/components/responses/BadRequest"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalError"
+ /mcp/tools:
+ get:
+ operationId: listTools
+ tags:
+ - Tools
+ description: |
+ Lists the currently available MCP tools. Only accessible when EXPOSE_MCP is enabled.
+ summary: Lists the currently available MCP tools
+ security:
+ - bearerAuth: []
+ responses:
+ "200":
+ description: Successful response
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ListToolsResponse"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/MCPNotExposed"
+ "500":
+ $ref: "#/components/responses/InternalError"
/proxy/{provider}/{path}:
parameters:
- name: provider
in: path
required: true
schema:
- $ref: "#/components/schemas/Providers"
+ $ref: "#/components/schemas/Provider"
- name: path
in: path
required: true
@@ -203,10 +193,14 @@ paths:
type: string
description: The remaining path to proxy to the provider
get:
- summary: Proxy GET request to provider
operationId: proxyGet
tags:
- Proxy
+ description: |
+ Proxy GET request to provider
+ The request body depends on the specific provider and endpoint being called.
+ If you decide to use this approach, please follow the provider-specific documentations.
+ summary: Proxy GET request to provider
responses:
"200":
$ref: "#/components/responses/ProviderResponse"
@@ -219,10 +213,14 @@ paths:
security:
- bearerAuth: []
post:
- summary: Proxy POST request to provider
operationId: proxyPost
tags:
- Proxy
+ description: |
+ Proxy POST request to provider
+ The request body depends on the specific provider and endpoint being called.
+ If you decide to use this approach, please follow the provider-specific documentations.
+ summary: Proxy POST request to provider
requestBody:
$ref: "#/components/requestBodies/ProviderRequest"
responses:
@@ -237,10 +235,14 @@ paths:
security:
- bearerAuth: []
put:
- summary: Proxy PUT request to provider
operationId: proxyPut
tags:
- Proxy
+ description: |
+ Proxy PUT request to provider
+ The request body depends on the specific provider and endpoint being called.
+ If you decide to use this approach, please follow the provider-specific documentations.
+ summary: Proxy PUT request to provider
requestBody:
$ref: "#/components/requestBodies/ProviderRequest"
responses:
@@ -255,10 +257,14 @@ paths:
security:
- bearerAuth: []
delete:
- summary: Proxy DELETE request to provider
operationId: proxyDelete
tags:
- Proxy
+ description: |
+ Proxy DELETE request to provider
+ The request body depends on the specific provider and endpoint being called.
+ If you decide to use this approach, please follow the provider-specific documentations.
+ summary: Proxy DELETE request to provider
responses:
"200":
$ref: "#/components/responses/ProviderResponse"
@@ -271,10 +277,14 @@ paths:
security:
- bearerAuth: []
patch:
- summary: Proxy PATCH request to provider
operationId: proxyPatch
tags:
- Proxy
+ description: |
+ Proxy PATCH request to provider
+ The request body depends on the specific provider and endpoint being called.
+ If you decide to use this approach, please follow the provider-specific documentations.
+ summary: Proxy PATCH request to provider
requestBody:
$ref: "#/components/requestBodies/ProviderRequest"
responses:
@@ -291,9 +301,12 @@ paths:
/health:
get:
operationId: healthCheck
- summary: Health check
tags:
- Health
+ description: |
+ Health check endpoint
+ Returns a 200 status code if the service is healthy
+ summary: Health check
responses:
"200":
description: Health check successful
@@ -322,7 +335,7 @@ components:
type: string
temperature:
type: number
- format: float64
+ format: float
default: 0.7
examples:
openai:
@@ -369,6 +382,14 @@ components:
application/json:
schema:
$ref: "#/components/schemas/Error"
+ MCPNotExposed:
+ description: MCP tools endpoint is not exposed
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ example:
+ error: "MCP tools endpoint is not exposed. Set EXPOSE_MCP=true to enable."
ProviderResponse:
description: |
ProviderResponse depends on the specific provider and endpoint being called
@@ -409,14 +430,7 @@ components:
To enable authentication, set ENABLE_AUTH to true.
When enabled, requests must include a valid JWT token in the Authorization header.
schemas:
- Endpoints:
- type: object
- properties:
- models:
- type: string
- chat:
- type: string
- Providers:
+ Provider:
type: string
enum:
- ollama
@@ -425,6 +439,7 @@ components:
- cloudflare
- cohere
- anthropic
+ - deepseek
x-provider-configs:
ollama:
id: "ollama"
@@ -504,6 +519,19 @@ components:
name: "chat_completions"
method: "POST"
endpoint: "/v1/chat/completions"
+ deepseek:
+ id: "deepseek"
+ url: "https://api.deepseek.com"
+ auth_type: "bearer"
+ endpoints:
+ models:
+ name: "list_models"
+ method: "GET"
+ endpoint: "/models"
+ chat:
+ name: "chat_completions"
+ method: "POST"
+ endpoint: "/chat/completions"
ProviderSpecificResponse:
type: object
description: |
@@ -542,7 +570,6 @@ components:
]
}
```
- additionalProperties: true
ProviderAuthType:
type: string
description: Authentication type for providers
@@ -551,6 +578,34 @@ components:
- xheader
- query
- none
+ SSEvent:
+ type: object
+ properties:
+ event:
+ type: string
+ enum:
+ - message-start
+ - stream-start
+ - content-start
+ - content-delta
+ - content-end
+ - message-end
+ - stream-end
+ data:
+ type: string
+ format: byte
+ retry:
+ type: integer
+ Endpoints:
+ type: object
+ properties:
+ models:
+ type: string
+ chat:
+ type: string
+ required:
+ - models
+ - chat
Error:
type: object
properties:
@@ -578,8 +633,12 @@ components:
$ref: "#/components/schemas/ChatCompletionMessageToolCall"
tool_call_id:
type: string
+ reasoning_content:
+ type: string
+ description: The reasoning content of the chunk message.
reasoning:
type: string
+ description: The reasoning of the chunk message. Same as reasoning_content.
required:
- role
- content
@@ -597,13 +656,19 @@ components:
owned_by:
type: string
served_by:
- type: string
+ $ref: "#/components/schemas/Provider"
+ required:
+ - id
+ - object
+ - created
+ - owned_by
+ - served_by
ListModelsResponse:
type: object
description: Response structure for listing models
properties:
provider:
- type: string
+ $ref: "#/components/schemas/Provider"
object:
type: string
data:
@@ -611,6 +676,56 @@ components:
items:
$ref: "#/components/schemas/Model"
default: []
+ required:
+ - object
+ - data
+ ListToolsResponse:
+ type: object
+ description: Response structure for listing MCP tools
+ properties:
+ object:
+ type: string
+ description: Always "list"
+ example: "list"
+ data:
+ type: array
+ items:
+ $ref: "#/components/schemas/MCPTool"
+ default: []
+ description: Array of available MCP tools
+ required:
+ - object
+ - data
+ MCPTool:
+ type: object
+ description: An MCP tool definition
+ properties:
+ name:
+ type: string
+ description: The name of the tool
+ example: "read_file"
+ description:
+ type: string
+ description: A description of what the tool does
+ example: "Read content from a file"
+ server:
+ type: string
+ description: The MCP server that provides this tool
+ example: "http://mcp-filesystem-server:8083/mcp"
+ input_schema:
+ type: object
+ description: JSON schema for the tool's input parameters
+ example:
+ type: "object"
+ properties:
+ file_path:
+ type: "string"
+ description: "Path to the file to read"
+ required: ["file_path"]
+ required:
+ - name
+ - description
+ - server
FunctionObject:
type: object
properties:
@@ -658,26 +773,6 @@ components:
documentation about the format.
Omitting `parameters` defines a function with an empty parameter list.
- properties:
- type:
- type: string
- description: The type of the parameters. Currently, only `object` is supported.
- properties:
- type: object
- description: The properties of the parameters.
- additionalProperties:
- type: object
- description: The schema for the parameter.
- additionalProperties: true
- required:
- type: array
- items:
- type: string
- description: The required properties of the parameters.
- additionalProperties:
- type: boolean
- default: false
- description: Whether additional properties are allowed.
additionalProperties: true
ChatCompletionToolType:
type: string
@@ -721,7 +816,8 @@ components:
usage statistics for the entire request, and the `choices` field
will always be an empty array. All other chunks will also include a
`usage` field, but with a null value.
- default: true
+ required:
+ - include_usage
CreateChatCompletionRequest:
type: object
properties:
@@ -758,6 +854,14 @@ components:
are supported.
items:
$ref: "#/components/schemas/ChatCompletionTool"
+ reasoning_format:
+ type: string
+ description: >
+ The format of the reasoning content. Can be `raw` or `parsed`.
+
+ When specified as raw some reasoning models will output tags.
+ When specified as parsed the model will output the reasoning under
+ `reasoning` or `reasoning_content` attribute.
required:
- model
- messages
@@ -793,16 +897,6 @@ components:
- id
- type
- function
- EventType:
- type: string
- enum:
- - message-start
- - stream-start
- - content-start
- - content-delta
- - content-end
- - message-end
- - stream-end
ChatCompletionChoice:
type: object
properties:
@@ -910,6 +1004,12 @@ components:
content:
type: string
description: The contents of the chunk message.
+ reasoning_content:
+ type: string
+ description: The reasoning content of the chunk message.
+ reasoning:
+ type: string
+ description: The reasoning of the chunk message. Same as reasoning_content.
tool_calls:
type: array
items:
@@ -919,6 +1019,9 @@ components:
refusal:
type: string
description: The refusal message generated by the model.
+ required:
+ - content
+ - role
ChatCompletionMessageToolCallChunk:
type: object
properties:
@@ -1051,106 +1154,25 @@ components:
description: The object type, which is always `chat.completion.chunk`.
usage:
$ref: "#/components/schemas/CompletionUsage"
+ reasoning_format:
+ type: string
+ description: >
+ The format of the reasoning content. Can be `raw` or `parsed`.
+
+ When specified as raw some reasoning models will output tags.
+ When specified as parsed the model will output the reasoning under reasoning_content.
required:
- choices
- created
- id
- model
- object
- CreateCompletionResponse:
- type: object
- description: >
- Represents a completion response from the API. Note: both the streamed
- and non-streamed response objects share the same shape (unlike the chat
- endpoint).
- properties:
- id:
- type: string
- description: A unique identifier for the completion.
- choices:
- type: array
- description:
- The list of completion choices the model generated for the input
- prompt.
- items:
- type: object
- required:
- - finish_reason
- - index
- - logprobs
- - text
- properties:
- finish_reason:
- type: string
- description: >
- The reason the model stopped generating tokens. This will be
- `stop` if the model hit a natural stop point or a provided
- stop sequence,
-
- `length` if the maximum number of tokens specified in the
- request was reached,
-
- or `content_filter` if content was omitted due to a flag from
- our content filters.
- enum:
- - stop
- - length
- - content_filter
- index:
- type: integer
- logprobs:
- type: object
- properties:
- text_offset:
- type: array
- items:
- type: integer
- token_logprobs:
- type: array
- items:
- type: number
- tokens:
- type: array
- items:
- type: string
- top_logprobs:
- type: array
- items:
- type: object
- additionalProperties:
- type: number
- text:
- type: string
- created:
- type: integer
- description: The Unix timestamp (in seconds) of when the completion was created.
- model:
- type: string
- description: The model used for completion.
- object:
- type: string
- description: The object type, which is always "text_completion"
- enum:
- - text_completion
- usage:
- $ref: "#/components/schemas/CompletionUsage"
- required:
- - id
- - object
- - created
- - model
- - choices
Config:
x-config:
sections:
- general:
title: "General settings"
settings:
- - name: application_name
- env: "APPLICATION_NAME"
- type: string
- default: "inference-gateway"
- description: "The name of the application"
- name: environment
env: "ENVIRONMENT"
type: string
@@ -1166,6 +1188,53 @@ components:
type: bool
default: "false"
description: "Enable authentication"
+ - mcp:
+ title: "Model Context Protocol (MCP)"
+ settings:
+ - name: mcp_enable
+ env: "MCP_ENABLE"
+ type: bool
+ default: "false"
+ description: "Enable MCP"
+ - name: mcp_expose
+ env: "MCP_EXPOSE"
+ type: bool
+ default: "false"
+ description: "Expose MCP tools endpoint"
+ - name: mcp_servers
+ env: "MCP_SERVERS"
+ type: string
+ description: "List of MCP servers"
+ - name: mcp_client_timeout
+ env: "MCP_CLIENT_TIMEOUT"
+ type: time.Duration
+ default: "5s"
+ description: "MCP client HTTP timeout"
+ - name: mcp_dial_timeout
+ env: "MCP_DIAL_TIMEOUT"
+ type: time.Duration
+ default: "3s"
+ description: "MCP client dial timeout"
+ - name: mcp_tls_handshake_timeout
+ env: "MCP_TLS_HANDSHAKE_TIMEOUT"
+ type: time.Duration
+ default: "3s"
+ description: "MCP client TLS handshake timeout"
+ - name: mcp_response_header_timeout
+ env: "MCP_RESPONSE_HEADER_TIMEOUT"
+ type: time.Duration
+ default: "3s"
+ description: "MCP client response header timeout"
+ - name: mcp_expect_continue_timeout
+ env: "MCP_EXPECT_CONTINUE_TIMEOUT"
+ type: time.Duration
+ default: "1s"
+ description: "MCP client expect continue timeout"
+ - name: mcp_request_timeout
+ env: "MCP_REQUEST_TIMEOUT"
+ type: time.Duration
+ default: "5s"
+ description: "MCP client request timeout for initialize and tool calls"
- oidc:
title: "OpenID Connect"
settings:
@@ -1312,3 +1381,13 @@ components:
type: string
description: "OpenAI API Key"
secret: true
+ - name: deepseek_api_url
+ env: "DEEPSEEK_API_URL"
+ type: string
+ default: "https://api.deepseek.com"
+ description: "DeepSeek API URL"
+ - name: deepseek_api_key
+ env: "DEEPSEEK_API_KEY"
+ type: string
+ description: "DeepSeek API Key"
+ secret: true
diff --git a/src/lib.rs b/src/lib.rs
index 8fb2aff..aa444cf 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -27,6 +27,9 @@ pub enum GatewayError {
#[error("Unauthorized: {0}")]
Unauthorized(String),
+ #[error("Forbidden: {0}")]
+ Forbidden(String),
+
#[error("Bad request: {0}")]
BadRequest(String),
@@ -84,6 +87,29 @@ pub struct ListModelsResponse {
pub data: Vec,
}
+/// An MCP tool definition
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct MCPTool {
+ /// The name of the tool
+ pub name: String,
+ /// A description of what the tool does
+ pub description: String,
+ /// The MCP server that provides this tool
+ pub server: String,
+ /// JSON schema for the tool's input parameters (optional)
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub input_schema: Option,
+}
+
+/// Response structure for listing MCP tools
+#[derive(Debug, Serialize, Deserialize)]
+pub struct ListToolsResponse {
+ /// Response object type, always "list"
+ pub object: String,
+ /// Array of available MCP tools
+ pub data: Vec,
+}
+
/// Supported LLM providers
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Copy)]
#[serde(rename_all = "lowercase")]
@@ -100,6 +126,8 @@ pub enum Provider {
Cohere,
#[serde(alias = "Anthropic", alias = "ANTHROPIC")]
Anthropic,
+ #[serde(alias = "Deepseek", alias = "DEEPSEEK")]
+ Deepseek,
}
impl fmt::Display for Provider {
@@ -111,6 +139,7 @@ impl fmt::Display for Provider {
Provider::Cloudflare => write!(f, "cloudflare"),
Provider::Cohere => write!(f, "cohere"),
Provider::Anthropic => write!(f, "anthropic"),
+ Provider::Deepseek => write!(f, "deepseek"),
}
}
}
@@ -126,6 +155,7 @@ impl TryFrom<&str> for Provider {
"cloudflare" => Ok(Self::Cloudflare),
"cohere" => Ok(Self::Cohere),
"anthropic" => Ok(Self::Anthropic),
+ "deepseek" => Ok(Self::Deepseek),
_ => Err(GatewayError::BadRequest(format!("Unknown provider: {}", s))),
}
}
@@ -422,6 +452,18 @@ pub trait InferenceGatewayAPI {
messages: Vec,
) -> impl Stream- > + Send;
+ /// Lists available MCP tools
+ ///
+ /// # Errors
+ /// - Returns [`GatewayError::Unauthorized`] if authentication fails
+ /// - Returns [`GatewayError::BadRequest`] if the request is malformed
+ /// - Returns [`GatewayError::InternalError`] if the server has an error
+ /// - Returns [`GatewayError::Other`] for other errors
+ ///
+ /// # Returns
+ /// A list of available MCP tools. Only accessible when EXPOSE_MCP is enabled.
+ fn list_tools(&self) -> impl Future