|
| 1 | +"""Anthropic (Claude) backend for the assistant (native Messages API). |
| 2 | +
|
| 3 | +Claude is NOT OpenAI-compatible : it uses ``POST /v1/messages`` with an |
| 4 | +``x-api-key`` header, a top-level ``system`` parameter, and a ``content`` block |
| 5 | +list in the response. It therefore needs its own client rather than the shared |
| 6 | +``OpenAICompatibleClient``. |
| 7 | +
|
| 8 | +Two operations are implemented: a chat completion constrained to a JSON Schema |
| 9 | +for tool routing (done with forced tool use, the reliable structured-output |
| 10 | +path on Claude) and a plain-text chat completion for the final summary |
| 11 | +sentence. Embeddings are not provided : Anthropic has no embeddings endpoint, |
| 12 | +so semantic search must use another provider (see ``embed``). |
| 13 | +
|
| 14 | +Only the calling user's question and the compact, identifier-stripped record |
| 15 | +fields produced by the read-only catalog tools leave the platform; ids and |
| 16 | +UUIDs are scrubbed before the summary call (see ``engine._strip_identifiers``). |
| 17 | +""" |
| 18 | + |
| 19 | +import logging |
| 20 | + |
| 21 | +import httpx |
| 22 | +from django.conf import settings |
| 23 | + |
| 24 | +from assistant.providers.base import ( |
| 25 | + BaseClient, |
| 26 | + MalformedModelOutput, |
| 27 | + ModelNotAvailable, |
| 28 | + ServiceUnreachable, |
| 29 | +) |
| 30 | + |
| 31 | +logger = logging.getLogger(__name__) |
| 32 | + |
| 33 | + |
| 34 | +class AnthropicClient(BaseClient): |
| 35 | + PROVIDER_LABEL = "Claude" |
| 36 | + # Applied when neither the constructor argument nor |
| 37 | + # ``settings.AI_ASSISTANT_BASE_URL`` is set. The Messages endpoint is |
| 38 | + # ``{base_url}/messages`` (so the default resolves to |
| 39 | + # ``https://api.anthropic.com/v1/messages``). |
| 40 | + DEFAULT_BASE_URL = "https://api.anthropic.com/v1" |
| 41 | + # Pinned API version sent on every request (Anthropic requirement). |
| 42 | + ANTHROPIC_VERSION = "2023-06-01" |
| 43 | + # Name of the synthetic tool used to force structured routing output. |
| 44 | + PLAN_TOOL_NAME = "plan" |
| 45 | + |
| 46 | + def __init__(self, base_url=None, model=None, api_key=None): |
| 47 | + self.base_url = ( |
| 48 | + base_url or settings.AI_ASSISTANT_BASE_URL or self.DEFAULT_BASE_URL |
| 49 | + ).rstrip("/") |
| 50 | + self.model = model or settings.AI_ASSISTANT_MODEL |
| 51 | + self.api_key = api_key if api_key is not None else settings.AI_ASSISTANT_API_KEY |
| 52 | + self.timeout = httpx.Timeout( |
| 53 | + settings.AI_ASSISTANT_TIMEOUT, |
| 54 | + connect=settings.AI_ASSISTANT_CONNECT_TIMEOUT, |
| 55 | + ) |
| 56 | + |
| 57 | + def _headers(self): |
| 58 | + if not self.api_key: |
| 59 | + raise ServiceUnreachable( |
| 60 | + f"{self.PROVIDER_LABEL} API key is not configured " |
| 61 | + "(set AI_ASSISTANT_API_KEY)." |
| 62 | + ) |
| 63 | + return { |
| 64 | + "x-api-key": self.api_key, |
| 65 | + "anthropic-version": self.ANTHROPIC_VERSION, |
| 66 | + "content-type": "application/json", |
| 67 | + } |
| 68 | + |
| 69 | + @staticmethod |
| 70 | + def _split_system(messages): |
| 71 | + """Split OpenAI-style messages into Claude's (system, messages) shape. |
| 72 | +
|
| 73 | + Claude takes the system prompt as a top-level parameter, not as a |
| 74 | + message with ``role: "system"``; user/assistant turns stay in |
| 75 | + ``messages``. |
| 76 | + """ |
| 77 | + system_parts = [] |
| 78 | + chat = [] |
| 79 | + for message in messages: |
| 80 | + role = message.get("role") |
| 81 | + content = message.get("content", "") |
| 82 | + if role == "system": |
| 83 | + if content: |
| 84 | + system_parts.append(content) |
| 85 | + else: |
| 86 | + chat.append({"role": role, "content": content}) |
| 87 | + return "\n\n".join(system_parts), chat |
| 88 | + |
| 89 | + def _base_payload(self, messages): |
| 90 | + system, chat = self._split_system(messages) |
| 91 | + # No temperature / thinking: both are rejected (HTTP 400) on the |
| 92 | + # current Opus family, which is the default model. |
| 93 | + payload = { |
| 94 | + "model": self.model, |
| 95 | + "max_tokens": settings.AI_ASSISTANT_MAX_TOKENS, |
| 96 | + "messages": chat, |
| 97 | + } |
| 98 | + if system: |
| 99 | + payload["system"] = system |
| 100 | + return payload |
| 101 | + |
| 102 | + def _post(self, payload): |
| 103 | + try: |
| 104 | + return httpx.post( |
| 105 | + f"{self.base_url}/messages", |
| 106 | + json=payload, |
| 107 | + headers=self._headers(), |
| 108 | + timeout=self.timeout, |
| 109 | + ) |
| 110 | + except (httpx.ConnectError, httpx.TimeoutException) as exc: |
| 111 | + raise ServiceUnreachable(str(exc)) from exc |
| 112 | + except httpx.HTTPError as exc: |
| 113 | + raise ServiceUnreachable(str(exc)) from exc |
| 114 | + |
| 115 | + def _raise_for_status(self, resp): |
| 116 | + if resp.status_code in (401, 403): |
| 117 | + # Never surface the key or auth detail to the caller. |
| 118 | + logger.error( |
| 119 | + "%s authentication failed (HTTP %s)", |
| 120 | + self.PROVIDER_LABEL, |
| 121 | + resp.status_code, |
| 122 | + ) |
| 123 | + raise ServiceUnreachable("authentication failed") |
| 124 | + if resp.status_code == 404: |
| 125 | + raise ModelNotAvailable(self.model) |
| 126 | + if resp.status_code >= 400: |
| 127 | + raise ServiceUnreachable(f"HTTP {resp.status_code}: {resp.text[:200]}") |
| 128 | + |
| 129 | + def _content_blocks(self, resp): |
| 130 | + try: |
| 131 | + blocks = resp.json()["content"] |
| 132 | + except (KeyError, TypeError, ValueError) as exc: |
| 133 | + raise MalformedModelOutput(resp.text[:200]) from exc |
| 134 | + if not isinstance(blocks, list): |
| 135 | + raise MalformedModelOutput(resp.text[:200]) |
| 136 | + return blocks |
| 137 | + |
| 138 | + def chat_json(self, messages, json_schema, think=None): |
| 139 | + """Chat completion constrained to ``json_schema``; returns the parsed object. |
| 140 | +
|
| 141 | + Uses forced tool use : a single ``plan`` tool whose ``input_schema`` is |
| 142 | + the routing schema, with ``tool_choice`` pinned to it. The model must |
| 143 | + emit a ``tool_use`` block whose ``input`` is the structured plan. The |
| 144 | + plan schema keeps a free-form ``arguments`` object, which Claude tool |
| 145 | + input schemas accept; server-side validation in the engine is the real |
| 146 | + safety net. |
| 147 | + """ |
| 148 | + payload = self._base_payload(messages) |
| 149 | + payload["tools"] = [ |
| 150 | + { |
| 151 | + "name": self.PLAN_TOOL_NAME, |
| 152 | + "description": "Return the execution plan for the question.", |
| 153 | + "input_schema": json_schema, |
| 154 | + } |
| 155 | + ] |
| 156 | + payload["tool_choice"] = {"type": "tool", "name": self.PLAN_TOOL_NAME} |
| 157 | + resp = self._post(payload) |
| 158 | + self._raise_for_status(resp) |
| 159 | + for block in self._content_blocks(resp): |
| 160 | + if block.get("type") == "tool_use" and block.get("name") == self.PLAN_TOOL_NAME: |
| 161 | + parsed = block.get("input") |
| 162 | + if not isinstance(parsed, dict): |
| 163 | + raise MalformedModelOutput(str(parsed)[:200]) |
| 164 | + return parsed |
| 165 | + raise MalformedModelOutput(resp.text[:200]) |
| 166 | + |
| 167 | + def chat_text(self, messages): |
| 168 | + """Plain-text chat completion.""" |
| 169 | + resp = self._post(self._base_payload(messages)) |
| 170 | + self._raise_for_status(resp) |
| 171 | + text = "".join( |
| 172 | + block.get("text", "") |
| 173 | + for block in self._content_blocks(resp) |
| 174 | + if block.get("type") == "text" |
| 175 | + ) |
| 176 | + return text.strip() |
| 177 | + |
| 178 | + def embed(self, texts): |
| 179 | + """Embeddings are unsupported : Anthropic has no embeddings endpoint.""" |
| 180 | + raise ServiceUnreachable( |
| 181 | + "The Claude provider does not support embeddings (Anthropic has no " |
| 182 | + "embeddings API). Disable AI_ASSISTANT_SEMANTIC_ENABLED, or set " |
| 183 | + "AI_ASSISTANT_PROVIDER to a provider with embeddings for indexing." |
| 184 | + ) |
0 commit comments