fix: DIA-1988: Replace Custom endpoint with OpenAI client (#7150)

robot-ci-heartex · niklub · nik · web-flow · commit 064b7be75d8b · 2025-02-28T17:36:37.000Z
Co-authored-by: niklub &lt;niklub@users.noreply.github.com&gt;
Co-authored-by: nik &lt;nik@heartex.net&gt;
diff --git a/docs/source/guide/prompts_keys.md b/docs/source/guide/prompts_keys.md
@@ -102,7 +102,7 @@ You can find all this information in the **Details** section of the deployment i
 
 You can use your own self-hosted and fine-tuned model as long as it meets the following criteria:
 
-* Your server must provide [JSON mode](https://python.useinstructor.com/concepts/patching/#json-mode) for the LLM. 
+* Your server must provide [JSON mode](https://js.useinstructor.com/concepts/patching/#json-schema-mode) for the LLM, specifically, the API must accepts `response_format` with `type: json_object` and `schema` with a valid JSON schema: ` {"response_format": {"type": "json_object", "schema": <schema>}}`
 * The server API must follow [OpenAI format](https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format). 
 
 Examples of compatible LLMs include [Ollama](https://ollama.com/) and [sglang](https://github.com/sgl-project/sglang?tab=readme-ov-file#openai-compatible-api). 
@@ -114,7 +114,7 @@ To add a custom model, enter the following:
 * An API key to access the model. An API key is tied to a specific account, but the access is shared within the org if added. (Optional)
 * An auth token to access the model API. An auth token provides API access at the server level. (Optional)
 
-### Example
+### Example with Ollama
 
 1. Setup [Ollama](https://ollama.com/), e.g. `ollama run llama3.2`
 2. [Verify your local OpenAI-compatible API is working](https://ollama.com/blog/openai-compatibility), e.g. `http://localhost:11434/v1`
@@ -124,3 +124,12 @@ To add a custom model, enter the following:
     - Endpoint: `https://my.openai.endpoint.com/v1` (note `v1` suffix is required)
     - API key: `ollama` (default)
     - Auth token: empty
+
+
+### Example with Hugging Face Inference Endpoints
+1. Use [DeepSeek model](https://huggingface.co/deepseek-ai/DeepSeek-R1)
+2. In `API Keys`, add to `Custom` provider:
+    - Name: `deepseek-ai/DeepSeek-R1`
+    - Endpoint: `https://router.huggingface.co/together/v1`
+    - API key: `<your-hf-api-key>`
+    - Auth token: empty
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -209,7 +209,7 @@ djangorestframework-simplejwt = {extras = ["crypto"], version = "^5.4.0"}
 tldextract = ">=5.1.3"
 
 # Humansignal repo dependencies
-label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/324323c4e50581f25d4aedc7c42922e01a330d7c.zip"}
+label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/0fecdd0030b8cefe471b574deed3ca19b64ff323.zip"}
 
 [tool.poetry.group.test.dependencies]
 pytest = "7.2.2"