diff --git a/.devcontainer/.zshrc b/.devcontainer/.zshrc deleted file mode 100644 index 34ac85d..0000000 --- a/.devcontainer/.zshrc +++ /dev/null @@ -1,111 +0,0 @@ -# If you come from bash you might have to change your $PATH. -export PATH=$HOME/bin:$HOME/.local/bin:/usr/local/bin:/opt/aarch64-linux-musl-cross/bin:/opt/x86_64-linux-musl-cross/bin:$PATH - -# Path to your Oh My Zsh installation. -export ZSH=$HOME/.oh-my-zsh - -# Set name of the theme to load --- if set to "random", it will -# load a random theme each time Oh My Zsh is loaded, in which case, -# to know which specific one was loaded, run: echo $RANDOM_THEME -# See https://github.com/ohmyzsh/ohmyzsh/wiki/Themes -ZSH_THEME="powerlevel10k/powerlevel10k" - -# Set list of themes to pick from when loading at random -# Setting this variable when ZSH_THEME="powerlevel10k/powerlevel10k" -# a theme from this variable instead of looking in $ZSH/themes/ -# If set to an empty array, this variable will have no effect. -# ZSH_THEME_RANDOM_CANDIDATES=( "robbyrussell" "agnoster" ) - -# Uncomment the following line to use case-sensitive completion. -# CASE_SENSITIVE="true" - -# Uncomment the following line to use hyphen-insensitive completion. -# Case-sensitive completion must be off. _ and - will be interchangeable. -# HYPHEN_INSENSITIVE="true" - -# Uncomment one of the following lines to change the auto-update behavior -# zstyle ':omz:update' mode disabled # disable automatic updates -# zstyle ':omz:update' mode auto # update automatically without asking -# zstyle ':omz:update' mode reminder # just remind me to update when it's time - -# Uncomment the following line to change how often to auto-update (in days). -# zstyle ':omz:update' frequency 13 - -# Uncomment the following line if pasting URLs and other text is messed up. -# DISABLE_MAGIC_FUNCTIONS="true" - -# Uncomment the following line to disable colors in ls. -# DISABLE_LS_COLORS="true" - -# Uncomment the following line to disable auto-setting terminal title. -# DISABLE_AUTO_TITLE="true" - -# Uncomment the following line to enable command auto-correction. -# ENABLE_CORRECTION="true" - -# Uncomment the following line to display red dots whilst waiting for completion. -# You can also set it to another string to have that shown instead of the default red dots. -# e.g. COMPLETION_WAITING_DOTS="%F{yellow}waiting...%f" -# Caution: this setting can cause issues with multiline prompts in zsh < 5.7.1 (see #5765) -# COMPLETION_WAITING_DOTS="true" - -# Uncomment the following line if you want to disable marking untracked files -# under VCS as dirty. This makes repository status check for large repositories -# much, much faster. -# DISABLE_UNTRACKED_FILES_DIRTY="true" - -# Uncomment the following line if you want to change the command execution time -# stamp shown in the history command output. -# You can set one of the optional three formats: -# "mm/dd/yyyy"|"dd.mm.yyyy"|"yyyy-mm-dd" -# or set a custom format using the strftime function format specifications, -# see 'man strftime' for details. -# HIST_STAMPS="mm/dd/yyyy" - -# Would you like to use another custom folder than $ZSH/custom? -# ZSH_CUSTOM=/path/to/new-custom-folder - -# Which plugins would you like to load? -# Standard plugins can be found in $ZSH/plugins/ -# Custom plugins may be added to $ZSH_CUSTOM/plugins/ -# Example format: plugins=(rails git textmate ruby lighthouse) -# Add wisely, as too many plugins slow down shell startup. -plugins=(git) - -source $ZSH/oh-my-zsh.sh - -# User configuration - -# export MANPATH="/usr/local/man:$MANPATH" - -# You may need to manually set your language environment -# export LANG=en_US.UTF-8 - -# Preferred editor for local and remote sessions -# if [[ -n $SSH_CONNECTION ]]; then -# export EDITOR='vim' -# else -# export EDITOR='mvim' -# fi - -# Compilation flags -# export ARCHFLAGS="-arch $(uname -m)" - -# Set personal aliases, overriding those provided by Oh My Zsh libs, -# plugins, and themes. Aliases can be placed here, though Oh My Zsh -# users are encouraged to define aliases within a top-level file in -# the $ZSH_CUSTOM folder, with .zsh extension. Examples: -# - $ZSH_CUSTOM/aliases.zsh -# - $ZSH_CUSTOM/macos.zsh -# For a full list of active aliases, run `alias`. -# -# Example aliases -# alias zshconfig="mate ~/.zshrc" -# alias ohmyzsh="mate ~/.oh-my-zsh" -DISABLE_AUTO_UPDATE=true -DISABLE_UPDATE_PROMPT=true -[[ ! -f ~/.p10k.zsh ]] || source ~/.p10k.zsh - -POWERLEVEL9K_DISABLE_CONFIGURATION_WIZARD=true - -eval "$(task --completion zsh)" diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 5dba761..5119189 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,9 +1,14 @@ FROM mcr.microsoft.com/devcontainers/rust:1.0.20-bookworm -ENV ZSH_CUSTOM=/home/vscode/.oh-my-zsh/custom \ - TASK_VERSION=v3.41.0 +ENV TASK_VERSION=v3.43.3 RUN apt-get update && \ + # Install GitHub CLI + curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg && \ + chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg && \ + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null && \ + apt-get update && \ + apt-get install -y gh && \ # Install nodejs and npm curl -fsSL https://deb.nodesource.com/setup_lts.x | bash - && \ # Install Task @@ -14,6 +19,12 @@ RUN apt-get update && \ RUN npm install -g semantic-release @semantic-release/changelog @semantic-release/exec @semantic-release/git @semantic-release/github conventional-changelog-conventionalcommits -RUN git clone --depth=1 https://github.com/romkatv/powerlevel10k.git ${ZSH_CUSTOM}/themes/powerlevel10k - USER vscode + +# Use Powerlevel10k theme +RUN git clone --depth=1 https://github.com/romkatv/powerlevel10k.git /home/vscode/.powerlevel10k && \ + echo 'source /home/vscode/.powerlevel10k/powerlevel10k.zsh-theme' >> /home/vscode/.zshrc && \ + echo 'POWERLEVEL9K_DISABLE_CONFIGURATION_WIZARD=true' >> /home/vscode/.zshrc + +RUN echo "source <(task --completion zsh)" >> /home/vscode/.zshrc +RUN echo "source <(gh completion -s zsh)" >> /home/vscode/.zshrc diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 1a25dc1..71eaf46 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,6 +1,9 @@ { "name": "Debian with Rust", "dockerFile": "Dockerfile", + "features": { + "ghcr.io/devcontainers/features/docker-in-docker:latest": {} + }, "customizations": { "vscode": { "extensions": [ @@ -33,14 +36,46 @@ "dev.containers.copyGitConfig": true, "githubPullRequests.experimental.chat": true, "githubPullRequests.experimental.notificationsView": true, - "files.insertFinalNewline": true + "files.insertFinalNewline": true, + "github.copilot.enable": { + "*": true + }, + "github.copilot.advanced": { + "authProvider": "github" + }, + "github.copilot.chat.codeGeneration.useInstructionFiles": true, + "github.copilot.chat.codeGeneration.instructions": [ + { + "file": ".github/copilot-instructions.md" + }, + { + "file": "../README.md" + } + ], + "github.copilot.chat.commitMessageGeneration.instructions": [ + { + "text": "Always use conventional commit message format." + } + ], + "mcp": { + "servers": { + "context7": { + "command": "docker", + "args": [ + "run", + "-i", + "--rm", + "node:lts", + "npx", + "-y", + "@upstash/context7-mcp@latest" + ] + } + } + } } } }, - "mounts": [ - "source=${localWorkspaceFolder}/.devcontainer/.zshrc,target=/home/vscode/.zshrc,type=bind,consistency=cached", - "source=${localWorkspaceFolder}/.devcontainer/launch.json,target=/workspaces/inference-gateway/rust-sdk/.vscode/launch.json,type=bind,consistency=cached" - ], "remoteEnv": { "GITHUB_TOKEN": "${localEnv:GITHUB_TOKEN}" } diff --git a/.devcontainer/launch.json b/.devcontainer/launch.json deleted file mode 100644 index c4f7a08..0000000 --- a/.devcontainer/launch.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "version": "0.2.0", - "configurations": [ - { - "type": "lldb", - "request": "launch", - "name": "Debug", - "env": { - "RUST_BACKTRACE": "1", - "RUST_LOG": "debug" - }, - "cwd": "${workspaceFolder}" - } - ] -} diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..c7b4a51 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,49 @@ +# Custom Instructions for Copilot + +Today is May 26, 2025. + +- Always use context7 to check for the latest updates, features, or best practices of a library relevant to the task at hand. +- Always prefer Table-Driven Testing: When writing tests. +- Always use Early Returns: Favor early returns to simplify logic and avoid deep nesting with if-else structures. +- Always prefer switch statements over if-else chains: Use switch statements for cleaner and more readable code when checking multiple conditions. +- Always run `task analyse` to ensure code quality and catch potential issues before committing. +- Always run `task lint` before committing code to ensure it adheres to the project's linting rules. +- Always run `task test` before committing code to ensure all tests pass. +- Always search for the simplest solution first before considering more complex alternatives. +- Always prefer type safety over dynamic typing: Use strong typing and interfaces to ensure type safety and reduce runtime errors. +- When working on MCP (Model Context Protocol) related tasks, always refer to the official MCP documentation and examples for guidance and ensure you run `task jrpc-mcp-schema-download` and `task generate` to keep the MCP Golang types up to date. +- When possible code to an interface so it's easier to mock in tests. +- When writing tests, each test case should have it's own isolated mock server mock dependecies so it's easier to understand and maintain. + +## Development Workflow + +### Configuration Changes + +When adding new configuration fields: + +1. Run `task oas-download` - OpenAPI is the source of truth - readonly file. +2. If added new Schemas to openapi.yaml, update internal/openapi/schemas.go to include the new schemas +3. Run `task lint` to ensure code quality +4. Run `task analyse` to catch potential issues +5. Run `task test` to ensure all tests pass +6. Update the README.md file or any documentation files with the recently added implementation + +## Available Tools and MCPs + +- context7 - Helps by finding the latest updates, features, or best practices of a library relevant to the task at hand. + +## Related Repositories + +- [Inference Gateway](https://github.com/inference-gateway) + - [Inference Gateway UI](https://github.com/inference-gateway/ui) + - [Go SDK](https://github.com/inference-gateway/go-sdk) + - [Rust SDK](https://github.com/inference-gateway/rust-sdk) + - [TypeScript SDK](https://github.com/inference-gateway/typescript-sdk) + - [Documentation](https://github.com/inference-gateway/docs) + +## MCP Useful links + +- [Introduction](https://modelcontextprotocol.io/introduction) +- [Specification](https://modelcontextprotocol.io/specification) +- [Examples](https://modelcontextprotocol.io/examples) +- [Schema](https://raw.githubusercontent.com/modelcontextprotocol/modelcontextprotocol/refs/heads/main/schema/draft/schema.json) diff --git a/README.md b/README.md index 50516d3..b89acd7 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ An SDK written in Rust for the [Inference Gateway](https://github.com/inference- - [Creating a Client](#creating-a-client) - [Listing Models](#listing-models) - [Listing Models from a specific provider](#listing-models-from-a-specific-provider) + - [Listing MCP Tools](#listing-mcp-tools) - [Generating Content](#generating-content) - [Streaming Content](#streaming-content) - [Tool-Use](#tool-use) @@ -32,6 +33,7 @@ use inference_gateway_sdk::{ InferenceGatewayAPI, InferenceGatewayClient, ListModelsResponse, + ListToolsResponse, Message, Provider, MessageRole @@ -137,6 +139,41 @@ for model in response.data { // ...Rest of the main function ``` +### Listing MCP Tools + +To list all available MCP (Model Context Protocol) tools from all configured MCP servers, use the `list_tools` method: + +```rust +use inference_gateway_sdk::{ + GatewayError, + InferenceGatewayAPI, + InferenceGatewayClient, + ListToolsResponse, +}; +use log::info; + +#[tokio::main] +async fn main() -> Result<(), GatewayError> { + // ...Create a client + + // List all MCP tools from all configured servers + let response: ListToolsResponse = client.list_tools().await?; + info!("Found {} MCP tools", response.data.len()); + + for tool in response.data { + info!("Tool: {} from server: {}", tool.name, tool.server); + info!("Description: {}", tool.description); + if let Some(schema) = &tool.input_schema { + info!("Input schema: {}", schema); + } + } + + Ok(()) +} +``` + +Note: This functionality requires that MCP servers are configured and exposed in your Inference Gateway instance. If MCP is not exposed, you'll receive a `403 Forbidden` error. + ### Generating Content To generate content using a model, use the `generate_content` method: diff --git a/openapi.yaml b/openapi.yaml index b7c4528..3340141 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -7,27 +7,43 @@ info: OpenAI, Groq, Ollama, and other providers are supported. OpenAI compatible API for using with existing clients. Unified API for all providers. + contact: + name: Inference Gateway + url: https://inference-gateway.github.io/docs/ version: 1.0.0 license: name: MIT url: https://github.com/inference-gateway/inference-gateway/blob/main/LICENSE servers: - url: http://localhost:8080 + description: Default server without version prefix for healthcheck and proxy and points + x-server-tags: ["Health", "Proxy"] + - url: http://localhost:8080/v1 + description: Default server with version prefix for listing models and chat completions + x-server-tags: ["Models", "Completions"] + - url: https://api.inference-gateway.local/v1 + description: Local server with version prefix for listing models and chat completions + x-server-tags: ["Models", "Completions"] tags: - name: Models description: List and describe the various models available in the API. - name: Completions description: Generate completions from the models. + - name: Tools + description: List and manage MCP tools. - name: Proxy description: Proxy requests to provider endpoints. - name: Health description: Health check paths: - /v1/models: + /models: get: operationId: listModels tags: - Models + description: | + Lists the currently available models, and provides basic information + about each one such as the owner and availability. summary: Lists the currently available models, and provides basic information about each one such as the owner and availability. @@ -38,7 +54,7 @@ paths: in: query required: false schema: - $ref: "#/components/schemas/Providers" + $ref: "#/components/schemas/Provider" description: Specific provider to query (optional) responses: "200": @@ -53,53 +69,64 @@ paths: value: object: "list" data: - - id: "gpt-4o" + - id: "openai/gpt-4o" object: "model" created: 1686935002 owned_by: "openai" - - id: "llama-3.3-70b-versatile" + served_by: "openai" + - id: "openai/llama-3.3-70b-versatile" object: "model" created: 1723651281 owned_by: "groq" - - id: "claude-3-opus-20240229" + served_by: "groq" + - id: "cohere/claude-3-opus-20240229" object: "model" created: 1708905600 owned_by: "anthropic" - - id: "command-r" + served_by: "anthropic" + - id: "cohere/command-r" object: "model" created: 1707868800 owned_by: "cohere" - - id: "phi3:3.8b" + served_by: "cohere" + - id: "ollama/phi3:3.8b" object: "model" created: 1718441600 owned_by: "ollama" + served_by: "ollama" singleProvider: summary: Models from a specific provider value: object: "list" data: - - id: "gpt-4o" + - id: "openai/gpt-4o" object: "model" created: 1686935002 owned_by: "openai" - - id: "gpt-4-turbo" + served_by: "openai" + - id: "openai/gpt-4-turbo" object: "model" created: 1687882410 owned_by: "openai" - - id: "gpt-3.5-turbo" + served_by: "openai" + - id: "openai/gpt-3.5-turbo" object: "model" created: 1677649963 owned_by: "openai" + served_by: "openai" "401": $ref: "#/components/responses/Unauthorized" "500": $ref: "#/components/responses/InternalError" - /v1/chat/completions: + /chat/completions: post: - summary: Create a chat completion - description: Creates a completion for the chat message with the specified provider + operationId: createChatCompletion tags: - Completions + description: | + Generates a chat completion based on the provided input. + The completion can be streamed to the client as it is generated. + summary: Create a chat completion security: - bearerAuth: [] parameters: @@ -107,93 +134,56 @@ paths: in: query required: false schema: - $ref: "#/components/schemas/Providers" + $ref: "#/components/schemas/Provider" description: Specific provider to use (default determined by model) requestBody: - required: true - content: - application/json: - schema: - type: object - required: - - model - - messages - properties: - model: - type: string - description: Model ID to use - messages: - type: array - items: - $ref: "#/components/schemas/Message" - temperature: - type: number - format: float - default: 0.7 - stream: - type: boolean - default: false - tools: - type: array - items: - type: object - max_tokens: - type: integer + $ref: "#/components/requestBodies/CreateChatCompletionRequest" responses: "200": description: Successful response content: application/json: schema: - type: object - properties: - id: - type: string - object: - type: string - example: "chat.completion" - created: - type: integer - format: int64 - model: - type: string - choices: - type: array - items: - type: object - properties: - index: - type: integer - message: - $ref: "#/components/schemas/Message" - finish_reason: - type: string - enum: [stop, length, tool_calls, content_filter] - usage: - type: object - properties: - prompt_tokens: - type: integer - completion_tokens: - type: integer - total_tokens: - type: integer + $ref: "#/components/schemas/CreateChatCompletionResponse" text/event-stream: schema: - type: string + $ref: "#/components/schemas/SSEvent" "400": $ref: "#/components/responses/BadRequest" "401": $ref: "#/components/responses/Unauthorized" "500": $ref: "#/components/responses/InternalError" + /mcp/tools: + get: + operationId: listTools + tags: + - Tools + description: | + Lists the currently available MCP tools. Only accessible when EXPOSE_MCP is enabled. + summary: Lists the currently available MCP tools + security: + - bearerAuth: [] + responses: + "200": + description: Successful response + content: + application/json: + schema: + $ref: "#/components/schemas/ListToolsResponse" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/MCPNotExposed" + "500": + $ref: "#/components/responses/InternalError" /proxy/{provider}/{path}: parameters: - name: provider in: path required: true schema: - $ref: "#/components/schemas/Providers" + $ref: "#/components/schemas/Provider" - name: path in: path required: true @@ -203,10 +193,14 @@ paths: type: string description: The remaining path to proxy to the provider get: - summary: Proxy GET request to provider operationId: proxyGet tags: - Proxy + description: | + Proxy GET request to provider + The request body depends on the specific provider and endpoint being called. + If you decide to use this approach, please follow the provider-specific documentations. + summary: Proxy GET request to provider responses: "200": $ref: "#/components/responses/ProviderResponse" @@ -219,10 +213,14 @@ paths: security: - bearerAuth: [] post: - summary: Proxy POST request to provider operationId: proxyPost tags: - Proxy + description: | + Proxy POST request to provider + The request body depends on the specific provider and endpoint being called. + If you decide to use this approach, please follow the provider-specific documentations. + summary: Proxy POST request to provider requestBody: $ref: "#/components/requestBodies/ProviderRequest" responses: @@ -237,10 +235,14 @@ paths: security: - bearerAuth: [] put: - summary: Proxy PUT request to provider operationId: proxyPut tags: - Proxy + description: | + Proxy PUT request to provider + The request body depends on the specific provider and endpoint being called. + If you decide to use this approach, please follow the provider-specific documentations. + summary: Proxy PUT request to provider requestBody: $ref: "#/components/requestBodies/ProviderRequest" responses: @@ -255,10 +257,14 @@ paths: security: - bearerAuth: [] delete: - summary: Proxy DELETE request to provider operationId: proxyDelete tags: - Proxy + description: | + Proxy DELETE request to provider + The request body depends on the specific provider and endpoint being called. + If you decide to use this approach, please follow the provider-specific documentations. + summary: Proxy DELETE request to provider responses: "200": $ref: "#/components/responses/ProviderResponse" @@ -271,10 +277,14 @@ paths: security: - bearerAuth: [] patch: - summary: Proxy PATCH request to provider operationId: proxyPatch tags: - Proxy + description: | + Proxy PATCH request to provider + The request body depends on the specific provider and endpoint being called. + If you decide to use this approach, please follow the provider-specific documentations. + summary: Proxy PATCH request to provider requestBody: $ref: "#/components/requestBodies/ProviderRequest" responses: @@ -291,9 +301,12 @@ paths: /health: get: operationId: healthCheck - summary: Health check tags: - Health + description: | + Health check endpoint + Returns a 200 status code if the service is healthy + summary: Health check responses: "200": description: Health check successful @@ -322,7 +335,7 @@ components: type: string temperature: type: number - format: float64 + format: float default: 0.7 examples: openai: @@ -369,6 +382,14 @@ components: application/json: schema: $ref: "#/components/schemas/Error" + MCPNotExposed: + description: MCP tools endpoint is not exposed + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + example: + error: "MCP tools endpoint is not exposed. Set EXPOSE_MCP=true to enable." ProviderResponse: description: | ProviderResponse depends on the specific provider and endpoint being called @@ -409,14 +430,7 @@ components: To enable authentication, set ENABLE_AUTH to true. When enabled, requests must include a valid JWT token in the Authorization header. schemas: - Endpoints: - type: object - properties: - models: - type: string - chat: - type: string - Providers: + Provider: type: string enum: - ollama @@ -425,6 +439,7 @@ components: - cloudflare - cohere - anthropic + - deepseek x-provider-configs: ollama: id: "ollama" @@ -504,6 +519,19 @@ components: name: "chat_completions" method: "POST" endpoint: "/v1/chat/completions" + deepseek: + id: "deepseek" + url: "https://api.deepseek.com" + auth_type: "bearer" + endpoints: + models: + name: "list_models" + method: "GET" + endpoint: "/models" + chat: + name: "chat_completions" + method: "POST" + endpoint: "/chat/completions" ProviderSpecificResponse: type: object description: | @@ -542,7 +570,6 @@ components: ] } ``` - additionalProperties: true ProviderAuthType: type: string description: Authentication type for providers @@ -551,6 +578,34 @@ components: - xheader - query - none + SSEvent: + type: object + properties: + event: + type: string + enum: + - message-start + - stream-start + - content-start + - content-delta + - content-end + - message-end + - stream-end + data: + type: string + format: byte + retry: + type: integer + Endpoints: + type: object + properties: + models: + type: string + chat: + type: string + required: + - models + - chat Error: type: object properties: @@ -578,8 +633,12 @@ components: $ref: "#/components/schemas/ChatCompletionMessageToolCall" tool_call_id: type: string + reasoning_content: + type: string + description: The reasoning content of the chunk message. reasoning: type: string + description: The reasoning of the chunk message. Same as reasoning_content. required: - role - content @@ -597,13 +656,19 @@ components: owned_by: type: string served_by: - type: string + $ref: "#/components/schemas/Provider" + required: + - id + - object + - created + - owned_by + - served_by ListModelsResponse: type: object description: Response structure for listing models properties: provider: - type: string + $ref: "#/components/schemas/Provider" object: type: string data: @@ -611,6 +676,56 @@ components: items: $ref: "#/components/schemas/Model" default: [] + required: + - object + - data + ListToolsResponse: + type: object + description: Response structure for listing MCP tools + properties: + object: + type: string + description: Always "list" + example: "list" + data: + type: array + items: + $ref: "#/components/schemas/MCPTool" + default: [] + description: Array of available MCP tools + required: + - object + - data + MCPTool: + type: object + description: An MCP tool definition + properties: + name: + type: string + description: The name of the tool + example: "read_file" + description: + type: string + description: A description of what the tool does + example: "Read content from a file" + server: + type: string + description: The MCP server that provides this tool + example: "http://mcp-filesystem-server:8083/mcp" + input_schema: + type: object + description: JSON schema for the tool's input parameters + example: + type: "object" + properties: + file_path: + type: "string" + description: "Path to the file to read" + required: ["file_path"] + required: + - name + - description + - server FunctionObject: type: object properties: @@ -658,26 +773,6 @@ components: documentation about the format. Omitting `parameters` defines a function with an empty parameter list. - properties: - type: - type: string - description: The type of the parameters. Currently, only `object` is supported. - properties: - type: object - description: The properties of the parameters. - additionalProperties: - type: object - description: The schema for the parameter. - additionalProperties: true - required: - type: array - items: - type: string - description: The required properties of the parameters. - additionalProperties: - type: boolean - default: false - description: Whether additional properties are allowed. additionalProperties: true ChatCompletionToolType: type: string @@ -721,7 +816,8 @@ components: usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value. - default: true + required: + - include_usage CreateChatCompletionRequest: type: object properties: @@ -758,6 +854,14 @@ components: are supported. items: $ref: "#/components/schemas/ChatCompletionTool" + reasoning_format: + type: string + description: > + The format of the reasoning content. Can be `raw` or `parsed`. + + When specified as raw some reasoning models will output tags. + When specified as parsed the model will output the reasoning under + `reasoning` or `reasoning_content` attribute. required: - model - messages @@ -793,16 +897,6 @@ components: - id - type - function - EventType: - type: string - enum: - - message-start - - stream-start - - content-start - - content-delta - - content-end - - message-end - - stream-end ChatCompletionChoice: type: object properties: @@ -910,6 +1004,12 @@ components: content: type: string description: The contents of the chunk message. + reasoning_content: + type: string + description: The reasoning content of the chunk message. + reasoning: + type: string + description: The reasoning of the chunk message. Same as reasoning_content. tool_calls: type: array items: @@ -919,6 +1019,9 @@ components: refusal: type: string description: The refusal message generated by the model. + required: + - content + - role ChatCompletionMessageToolCallChunk: type: object properties: @@ -1051,106 +1154,25 @@ components: description: The object type, which is always `chat.completion.chunk`. usage: $ref: "#/components/schemas/CompletionUsage" + reasoning_format: + type: string + description: > + The format of the reasoning content. Can be `raw` or `parsed`. + + When specified as raw some reasoning models will output tags. + When specified as parsed the model will output the reasoning under reasoning_content. required: - choices - created - id - model - object - CreateCompletionResponse: - type: object - description: > - Represents a completion response from the API. Note: both the streamed - and non-streamed response objects share the same shape (unlike the chat - endpoint). - properties: - id: - type: string - description: A unique identifier for the completion. - choices: - type: array - description: - The list of completion choices the model generated for the input - prompt. - items: - type: object - required: - - finish_reason - - index - - logprobs - - text - properties: - finish_reason: - type: string - description: > - The reason the model stopped generating tokens. This will be - `stop` if the model hit a natural stop point or a provided - stop sequence, - - `length` if the maximum number of tokens specified in the - request was reached, - - or `content_filter` if content was omitted due to a flag from - our content filters. - enum: - - stop - - length - - content_filter - index: - type: integer - logprobs: - type: object - properties: - text_offset: - type: array - items: - type: integer - token_logprobs: - type: array - items: - type: number - tokens: - type: array - items: - type: string - top_logprobs: - type: array - items: - type: object - additionalProperties: - type: number - text: - type: string - created: - type: integer - description: The Unix timestamp (in seconds) of when the completion was created. - model: - type: string - description: The model used for completion. - object: - type: string - description: The object type, which is always "text_completion" - enum: - - text_completion - usage: - $ref: "#/components/schemas/CompletionUsage" - required: - - id - - object - - created - - model - - choices Config: x-config: sections: - general: title: "General settings" settings: - - name: application_name - env: "APPLICATION_NAME" - type: string - default: "inference-gateway" - description: "The name of the application" - name: environment env: "ENVIRONMENT" type: string @@ -1166,6 +1188,53 @@ components: type: bool default: "false" description: "Enable authentication" + - mcp: + title: "Model Context Protocol (MCP)" + settings: + - name: mcp_enable + env: "MCP_ENABLE" + type: bool + default: "false" + description: "Enable MCP" + - name: mcp_expose + env: "MCP_EXPOSE" + type: bool + default: "false" + description: "Expose MCP tools endpoint" + - name: mcp_servers + env: "MCP_SERVERS" + type: string + description: "List of MCP servers" + - name: mcp_client_timeout + env: "MCP_CLIENT_TIMEOUT" + type: time.Duration + default: "5s" + description: "MCP client HTTP timeout" + - name: mcp_dial_timeout + env: "MCP_DIAL_TIMEOUT" + type: time.Duration + default: "3s" + description: "MCP client dial timeout" + - name: mcp_tls_handshake_timeout + env: "MCP_TLS_HANDSHAKE_TIMEOUT" + type: time.Duration + default: "3s" + description: "MCP client TLS handshake timeout" + - name: mcp_response_header_timeout + env: "MCP_RESPONSE_HEADER_TIMEOUT" + type: time.Duration + default: "3s" + description: "MCP client response header timeout" + - name: mcp_expect_continue_timeout + env: "MCP_EXPECT_CONTINUE_TIMEOUT" + type: time.Duration + default: "1s" + description: "MCP client expect continue timeout" + - name: mcp_request_timeout + env: "MCP_REQUEST_TIMEOUT" + type: time.Duration + default: "5s" + description: "MCP client request timeout for initialize and tool calls" - oidc: title: "OpenID Connect" settings: @@ -1312,3 +1381,13 @@ components: type: string description: "OpenAI API Key" secret: true + - name: deepseek_api_url + env: "DEEPSEEK_API_URL" + type: string + default: "https://api.deepseek.com" + description: "DeepSeek API URL" + - name: deepseek_api_key + env: "DEEPSEEK_API_KEY" + type: string + description: "DeepSeek API Key" + secret: true diff --git a/src/lib.rs b/src/lib.rs index 8fb2aff..aa444cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,6 +27,9 @@ pub enum GatewayError { #[error("Unauthorized: {0}")] Unauthorized(String), + #[error("Forbidden: {0}")] + Forbidden(String), + #[error("Bad request: {0}")] BadRequest(String), @@ -84,6 +87,29 @@ pub struct ListModelsResponse { pub data: Vec, } +/// An MCP tool definition +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct MCPTool { + /// The name of the tool + pub name: String, + /// A description of what the tool does + pub description: String, + /// The MCP server that provides this tool + pub server: String, + /// JSON schema for the tool's input parameters (optional) + #[serde(skip_serializing_if = "Option::is_none")] + pub input_schema: Option, +} + +/// Response structure for listing MCP tools +#[derive(Debug, Serialize, Deserialize)] +pub struct ListToolsResponse { + /// Response object type, always "list" + pub object: String, + /// Array of available MCP tools + pub data: Vec, +} + /// Supported LLM providers #[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Copy)] #[serde(rename_all = "lowercase")] @@ -100,6 +126,8 @@ pub enum Provider { Cohere, #[serde(alias = "Anthropic", alias = "ANTHROPIC")] Anthropic, + #[serde(alias = "Deepseek", alias = "DEEPSEEK")] + Deepseek, } impl fmt::Display for Provider { @@ -111,6 +139,7 @@ impl fmt::Display for Provider { Provider::Cloudflare => write!(f, "cloudflare"), Provider::Cohere => write!(f, "cohere"), Provider::Anthropic => write!(f, "anthropic"), + Provider::Deepseek => write!(f, "deepseek"), } } } @@ -126,6 +155,7 @@ impl TryFrom<&str> for Provider { "cloudflare" => Ok(Self::Cloudflare), "cohere" => Ok(Self::Cohere), "anthropic" => Ok(Self::Anthropic), + "deepseek" => Ok(Self::Deepseek), _ => Err(GatewayError::BadRequest(format!("Unknown provider: {}", s))), } } @@ -422,6 +452,18 @@ pub trait InferenceGatewayAPI { messages: Vec, ) -> impl Stream> + Send; + /// Lists available MCP tools + /// + /// # Errors + /// - Returns [`GatewayError::Unauthorized`] if authentication fails + /// - Returns [`GatewayError::BadRequest`] if the request is malformed + /// - Returns [`GatewayError::InternalError`] if the server has an error + /// - Returns [`GatewayError::Other`] for other errors + /// + /// # Returns + /// A list of available MCP tools. Only accessible when EXPOSE_MCP is enabled. + fn list_tools(&self) -> impl Future> + Send; + /// Checks if the API is available fn health_check(&self) -> impl Future> + Send; } @@ -663,6 +705,42 @@ impl InferenceGatewayAPI for InferenceGatewayClient { } } + async fn list_tools(&self) -> Result { + let url = format!("{}/mcp/tools", self.base_url); + let mut request = self.client.get(&url); + if let Some(token) = &self.token { + request = request.bearer_auth(token); + } + + let response = request.send().await?; + match response.status() { + StatusCode::OK => { + let json_response: ListToolsResponse = response.json().await?; + Ok(json_response) + } + StatusCode::UNAUTHORIZED => { + let error: ErrorResponse = response.json().await?; + Err(GatewayError::Unauthorized(error.error)) + } + StatusCode::BAD_REQUEST => { + let error: ErrorResponse = response.json().await?; + Err(GatewayError::BadRequest(error.error)) + } + StatusCode::FORBIDDEN => { + let error: ErrorResponse = response.json().await?; + Err(GatewayError::Forbidden(error.error)) + } + StatusCode::INTERNAL_SERVER_ERROR => { + let error: ErrorResponse = response.json().await?; + Err(GatewayError::InternalError(error.error)) + } + _ => Err(GatewayError::Other(Box::new(std::io::Error::new( + std::io::ErrorKind::Other, + format!("Unexpected status code: {}", response.status()), + )))), + } + } + async fn health_check(&self) -> Result { let url = format!("{}/health", self.base_url); @@ -694,6 +772,7 @@ mod tests { (Provider::Cloudflare, "cloudflare"), (Provider::Cohere, "cohere"), (Provider::Anthropic, "anthropic"), + (Provider::Deepseek, "deepseek"), ]; for (provider, expected) in providers { @@ -711,6 +790,7 @@ mod tests { ("\"cloudflare\"", Provider::Cloudflare), ("\"cohere\"", Provider::Cohere), ("\"anthropic\"", Provider::Anthropic), + ("\"deepseek\"", Provider::Deepseek), ]; for (json, expected) in test_cases { @@ -763,6 +843,7 @@ mod tests { (Provider::Cloudflare, "cloudflare"), (Provider::Cohere, "cohere"), (Provider::Anthropic, "anthropic"), + (Provider::Deepseek, "deepseek"), ]; for (provider, expected) in providers { @@ -1699,4 +1780,127 @@ mod tests { Ok(()) } + + #[tokio::test] + async fn test_list_tools() -> Result<(), GatewayError> { + let mut server = Server::new_async().await; + + let raw_response_json = r#"{ + "object": "list", + "data": [ + { + "name": "read_file", + "description": "Read content from a file", + "server": "http://mcp-filesystem-server:8083/mcp", + "input_schema": { + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "Path to the file to read" + } + }, + "required": ["file_path"] + } + }, + { + "name": "write_file", + "description": "Write content to a file", + "server": "http://mcp-filesystem-server:8083/mcp" + } + ] + }"#; + + let mock = server + .mock("GET", "/v1/mcp/tools") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(raw_response_json) + .create(); + + let base_url = format!("{}/v1", server.url()); + let client = InferenceGatewayClient::new(&base_url); + let response = client.list_tools().await?; + + assert_eq!(response.object, "list"); + assert_eq!(response.data.len(), 2); + + // Test first tool with input_schema + assert_eq!(response.data[0].name, "read_file"); + assert_eq!(response.data[0].description, "Read content from a file"); + assert_eq!( + response.data[0].server, + "http://mcp-filesystem-server:8083/mcp" + ); + assert!(response.data[0].input_schema.is_some()); + + // Test second tool without input_schema + assert_eq!(response.data[1].name, "write_file"); + assert_eq!(response.data[1].description, "Write content to a file"); + assert_eq!( + response.data[1].server, + "http://mcp-filesystem-server:8083/mcp" + ); + assert!(response.data[1].input_schema.is_none()); + + mock.assert(); + Ok(()) + } + + #[tokio::test] + async fn test_list_tools_with_authentication() -> Result<(), GatewayError> { + let mut server = Server::new_async().await; + + let raw_response_json = r#"{ + "object": "list", + "data": [] + }"#; + + let mock = server + .mock("GET", "/v1/mcp/tools") + .match_header("authorization", "Bearer test-token") + .with_status(200) + .with_header("content-type", "application/json") + .with_body(raw_response_json) + .create(); + + let base_url = format!("{}/v1", server.url()); + let client = InferenceGatewayClient::new(&base_url).with_token("test-token"); + let response = client.list_tools().await?; + + assert_eq!(response.object, "list"); + assert_eq!(response.data.len(), 0); + mock.assert(); + Ok(()) + } + + #[tokio::test] + async fn test_list_tools_mcp_not_exposed() -> Result<(), GatewayError> { + let mut server = Server::new_async().await; + + let mock = server + .mock("GET", "/v1/mcp/tools") + .with_status(403) + .with_header("content-type", "application/json") + .with_body( + r#"{"error":"MCP tools endpoint is not exposed. Set EXPOSE_MCP=true to enable."}"#, + ) + .create(); + + let base_url = format!("{}/v1", server.url()); + let client = InferenceGatewayClient::new(&base_url); + + match client.list_tools().await { + Err(GatewayError::Forbidden(msg)) => { + assert_eq!( + msg, + "MCP tools endpoint is not exposed. Set EXPOSE_MCP=true to enable." + ); + } + _ => panic!("Expected Forbidden error for MCP not exposed"), + } + + mock.assert(); + Ok(()) + } }