Skip to content

Commit df0f10e

Browse files
committed
[owl] Bug fixes and updates (#849)
Backend - owl (API server) - Bug fixes: - Always update reasoning content in state - Only record reasoning time if there's reasoning content - Anthropic 4.1 and 4.5 models cannot specify both `temperature` and `top_p` - Delete org secret upon org deletion - Bump dependencies - Deps: Pin Pydantic to `2.11.x` - Test-LLM: Support reasoning content
1 parent 6f5adff commit df0f10e

File tree

12 files changed

+241
-128
lines changed

12 files changed

+241
-128
lines changed

.github/workflows/ci.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,11 @@ jobs:
247247
timeout-minutes: 1
248248
run: mkdir -p logs && docker compose -p jm -f docker/compose.ci.yml logs starling > logs/starling.log
249249

250+
- name: Inspect test-llm logs
251+
timeout-minutes: 1
252+
if: always() && steps.launch_services.outcome == 'success'
253+
run: mkdir -p logs && docker compose -p jm -f docker/compose.ci.yml logs test-llm > logs/test-llm.log
254+
250255
- name: Test Stripe integration (Cloud only)
251256
id: test_stripe
252257
if: matrix.jamai-mode == 'cloud' && matrix.test-group == 'group1' && steps.launch_services.outcome == 'success'

clients/python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ dependencies = [
117117
"Pillow>=10.0",
118118
"pydantic-extra-types~=2.9",
119119
"pydantic-settings~=2.4",
120-
"pydantic[email,timezone]~=2.10",
120+
"pydantic[email,timezone]~=2.11",
121121
"pyyaml~=6.0",
122122
"toml~=0.10.2",
123123
"typing_extensions~=4.10",

services/api/pyproject.toml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -111,15 +111,15 @@ classifiers = [ # https://pypi.org/classifiers/
111111
# Sort your dependencies https://sortmylist.com/
112112
# In general, for v1 and above, we pin to minor version using ~=
113113
dependencies = [
114-
"aioboto3~=7.0.0",
115-
"aiobotocore~=2.21.0", # Took long time to resolve
116-
"aiofiles~=24.1.0",
114+
"aioboto3~=15.5.0",
115+
"aiobotocore==2.25.1", # Took long time to resolve
116+
"aiofiles~=25.1.0",
117117
"aiosqlite~=0.21.0",
118118
"async-lru~=2.0.0",
119119
"asyncpg~=0.30.0",
120120
"authlib~=1.6.0",
121121
"bm25s~=0.2.0",
122-
"boto3==1.37.1", # Took long time to resolve
122+
"boto3==1.40.61", # Took long time to resolve
123123
"celery~=5.5.0",
124124
"clickhouse-connect~=0.8.0",
125125
"cloudevents~=1.12.0",
@@ -142,7 +142,7 @@ dependencies = [
142142
"natsort[fast]~=8.4.0",
143143
"nltk~=3.9.0",
144144
"numpy>=1.26.0",
145-
"openai~=1.99.0",
145+
"openai~=2.9.0",
146146
"opentelemetry-api~=1.36.0",
147147
"opentelemetry-distro~=0.57b0",
148148
"opentelemetry-exporter-otlp~=1.36.0",
@@ -187,14 +187,14 @@ dependencies = [
187187
"pycryptodomex~=3.23.0",
188188
"pydantic-extra-types~=2.10.0",
189189
"pydantic-settings~=2.10.0",
190-
"pydantic[email,timezone]~=2.11.0",
190+
"pydantic[email,timezone]~=2.11.0", # 2.12 causes issues with sqlmodel Datetime
191191
"pydub~=0.25.0",
192192
"pyjwt~=2.10.0",
193193
"pylance==0.16.0",
194194
"python-multipart~=0.0.20",
195195
"redis[hiredis]~=5.3.0",
196196
"SQLAlchemy~=2.0.0",
197-
"sqlmodel~=0.0.20",
197+
"sqlmodel~=0.0.27",
198198
"sqlparse~=0.5.0",
199199
"starlette~=0.41.0",
200200
"stripe~=9.12.0",

services/api/src/owl/db/gen_executor.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -748,7 +748,7 @@ async def _execute_chat_task(self, task: Task, q: Queue[ResultT | None]) -> None
748748
):
749749
reasoning += chunk.reasoning_content
750750
result += chunk.content
751-
if chunk.content and reasoning_time is None:
751+
if chunk.content and reasoning and reasoning_time is None:
752752
reasoning_time = perf_counter() - t0
753753
# if chunk.content is None and chunk.usage is None:
754754
# continue
@@ -822,12 +822,10 @@ async def _execute_chat_task(self, task: Task, q: Queue[ResultT | None]) -> None
822822
await q.put(None)
823823
state_col = f"{task.output_column_name}_"
824824
state = self._column_dict.get(state_col, {})
825-
if references is not None:
826-
state["references"] = references.model_dump(mode="json")
827-
if reasoning:
828-
state["reasoning_content"] = reasoning
829-
if reasoning_time is not None:
830-
state["reasoning_time"] = reasoning_time
825+
# Always update state
826+
state["references"] = references.model_dump(mode="json") if references else None
827+
state["reasoning_content"] = reasoning if reasoning else None
828+
state["reasoning_time"] = reasoning_time if reasoning_time else None
831829
self._column_dict[state_col] = state
832830
await self._signal_task_completion(task, result)
833831
self.log(f'Streamed completion for column "{output_column}": <{mask_string(result)}>.')

services/api/src/owl/entrypoints/llm.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ async def chat_completion(body: ChatCompletionRequest):
234234
model_spec = _parse_chat_model_id(body.model)
235235
num_input_tokens = len(" ".join(m.text_content for m in body.messages).split(" "))
236236
user_messages = [m for m in body.messages if m.role == ChatRole.USER]
237+
reasoning = body.reasoning_effort not in (None, "disable", "minimal", "none")
237238

238239
# Test context length error handling
239240
if num_input_tokens > model_spec.max_context_length:
@@ -348,15 +349,16 @@ async def stream_response():
348349
for i in range(body.n):
349350
if model_spec.tpot_ms > 0:
350351
await sleep(model_spec.tpot_ms / 1000)
352+
if reasoning and t < 2:
353+
delta = ChatCompletionDelta(reasoning_content=content)
354+
else:
355+
delta = ChatCompletionDelta(content=content)
351356
chunk = ChatCompletionChunkResponse(
352357
id=body.id,
353358
model=model_spec.id,
354359
choices=[
355360
ChatCompletionChoice(
356-
index=i,
357-
delta=ChatCompletionDelta(content=content),
358-
logprobs=None,
359-
finish_reason=None,
361+
index=i, delta=delta, logprobs=None, finish_reason=None
360362
)
361363
],
362364
usage=None,
@@ -417,20 +419,25 @@ async def stream_response():
417419
# Non-stream
418420
if (model_spec.ttft_ms + model_spec.tpot_ms) > 0:
419421
await sleep((model_spec.ttft_ms + model_spec.tpot_ms * len(completion_tokens)) / 1000)
422+
contents = []
423+
reasoning_contents = []
424+
for t in range(num_completion_tokens):
425+
if reasoning and t < 2:
426+
reasoning_contents.append(completion_tokens[t % len(completion_tokens)])
427+
else:
428+
contents.append(completion_tokens[t % len(completion_tokens)])
420429
response = ChatCompletionResponse(
421430
id=body.id,
422431
model=model_spec.id,
423432
choices=[
424433
ChatCompletionChoice(
425434
index=i,
426435
message=ChatCompletionMessage(
427-
content=" ".join(
428-
completion_tokens[t % len(completion_tokens)]
429-
for t in range(num_completion_tokens)
430-
)
436+
content=" ".join(contents),
437+
reasoning_content=" ".join(reasoning_contents) if reasoning_contents else None,
431438
),
432439
logprobs=None,
433-
finish_reason="length",
440+
finish_reason="length" if num_completion_tokens == body.max_tokens else "stop",
434441
)
435442
for i in range(body.n)
436443
],

services/api/src/owl/routers/organizations/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
router.include_router(oss_router)
99
router.include_router(secrets_router)
1010

11+
1112
if ENV_CONFIG.is_cloud:
1213
from owl.routers.organizations.cloud import router as cloud_router
1314

services/api/src/owl/routers/organizations/oss.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
PricePlan,
1818
Project,
1919
ProjectMember,
20+
Secret,
2021
User,
2122
)
2223
from owl.types import (
@@ -316,6 +317,7 @@ async def delete_organization(
316317
# Delete related resources
317318
await session.exec(delete(Organization).where(Organization.id == organization_id))
318319
await session.exec(delete(Project).where(Project.organization_id == organization_id))
320+
await session.exec(delete(Secret).where(Secret.organization_id == organization_id))
319321
if ENV_CONFIG.is_cloud:
320322
from owl.db.models.cloud import VerificationCode
321323

services/api/src/owl/routers/organizations/secrets.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,17 @@
2424
ResourceNotFoundError,
2525
handle_exception,
2626
)
27+
from owl.utils.mcp import MCP_TOOL_TAG
2728

2829
router = APIRouter()
2930

3031

31-
@router.post("/v2/secrets")
32+
@router.post(
33+
"/v2/secrets",
34+
summary="Create an organization secret.",
35+
description="Permissions: `organization.ADMIN`.",
36+
tags=[MCP_TOOL_TAG, "organization.ADMIN"],
37+
)
3238
@handle_exception
3339
async def create_secret(
3440
request: Request,
@@ -80,9 +86,7 @@ async def create_secret(
8086
existing_projects = (await session.exec(statement)).all()
8187
if len(existing_projects) != len(body.allowed_projects):
8288
non_exist_projects = set(body.allowed_projects) - set(existing_projects)
83-
raise BadInputError(
84-
f"Non-existing projects are not allowed: '{"', '".join(non_exist_projects)}'."
85-
)
89+
raise ResourceNotFoundError(f"Projects not found: {', '.join(non_exist_projects)}")
8690

8791
# Create new secret
8892
secret = Secret(
@@ -103,8 +107,9 @@ async def create_secret(
103107

104108
@router.get(
105109
"/v2/secrets/list",
106-
summary="List system-wide secrets.",
110+
summary="List organization secrets.",
107111
description="Permissions: `organization.MEMBER`.",
112+
tags=[MCP_TOOL_TAG, "organization.MEMBER"],
108113
)
109114
@handle_exception
110115
async def list_secrets(
@@ -158,8 +163,9 @@ async def list_secrets(
158163

159164
@router.get(
160165
"/v2/secrets",
161-
summary="Get a secret.",
166+
summary="Get an organization secret.",
162167
description="Permissions: `organization.MEMBER`.",
168+
tags=[MCP_TOOL_TAG, "organization.MEMBER"],
163169
)
164170
@handle_exception
165171
async def get_secret(
@@ -195,11 +201,15 @@ async def get_secret(
195201
secret = await session.get(Secret, (organization_id, normalized_name))
196202
if secret is None:
197203
raise ResourceNotFoundError(f'Secret "{normalized_name}" is not found.')
198-
199204
return secret.to_read_masked()
200205

201206

202-
@router.patch("/v2/secrets")
207+
@router.patch(
208+
"/v2/secrets",
209+
summary="Update an organization secret.",
210+
description="Permissions: `organization.ADMIN`.",
211+
tags=[MCP_TOOL_TAG, "organization.ADMIN"],
212+
)
203213
@handle_exception
204214
async def update_secret(
205215
request: Request,
@@ -256,9 +266,7 @@ async def update_secret(
256266
existing_projects = (await session.exec(statement)).all()
257267
if len(existing_projects) != len(body.allowed_projects):
258268
non_exist_projects = set(body.allowed_projects) - set(existing_projects)
259-
raise BadInputError(
260-
f"Non-existing projects are not allowed: '{"', '".join(non_exist_projects)}'."
261-
)
269+
raise ResourceNotFoundError(f"Projects not found: {', '.join(non_exist_projects)}")
262270
secret, updates = await Secret.update(
263271
session, (organization_id, normalized_name), body, name="Secret"
264272
)
@@ -272,8 +280,9 @@ async def update_secret(
272280

273281
@router.delete(
274282
"/v2/secrets",
275-
summary="Delete a secret.",
283+
summary="Delete an organization secret.",
276284
description="Permissions: `organization.ADMIN`.",
285+
tags=[MCP_TOOL_TAG, "organization.ADMIN"],
277286
)
278287
@handle_exception
279288
async def delete_secret(

services/api/src/owl/utils/lm.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -498,8 +498,8 @@ def _prepare_hyperparams(
498498

499499
# Anthropic specific
500500
if ctx.inference_provider == CloudProvider.ANTHROPIC:
501-
# Sonnet 4.5 cannot specify both `temperature` and `top_p`
502-
if "sonnet-4-5" in ctx.routing_id:
501+
# 4.1 and 4.5 models cannot specify both `temperature` and `top_p`
502+
if "-4-1" in ctx.routing_id or "-4-5" in ctx.routing_id:
503503
t = hyperparams.get("temperature", None)
504504
p = hyperparams.get("top_p", None)
505505
if t is not None and p is not None:
@@ -587,6 +587,7 @@ def _prepare_hyperparams(
587587
reasoning_effort = "high"
588588
if ctx.inference_provider == CloudProvider.ELLM:
589589
hyperparams["reasoning_effort"] = reasoning_effort
590+
hyperparams["allowed_openai_params"] = ["reasoning_effort"]
590591
return
591592
elif ctx.inference_provider in [CloudProvider.GEMINI, CloudProvider.ANTHROPIC]:
592593
# Gemini 3-Pro recommends reasoning_effort
@@ -604,8 +605,10 @@ def _prepare_hyperparams(
604605
else:
605606
thinking_budget = 8192
606607
if ctx.inference_provider == CloudProvider.ANTHROPIC:
607-
hyperparams["temperature"] = 1
608-
hyperparams["top_p"] = min(max(0.95, hyperparams.pop("top_p", 1.0)), 1.0)
608+
if "temperature" in hyperparams:
609+
hyperparams["temperature"] = 1
610+
if "top_p" in hyperparams:
611+
hyperparams["top_p"] = min(max(0.95, hyperparams.pop("top_p", 1.0)), 1.0)
609612
thinking_budget = max(thinking_budget, 1024)
610613
hyperparams["thinking"] = {
611614
"type": "enabled",

services/api/src/owl/utils/test.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ def setup_projects():
401401
type=ModelType.LLM,
402402
capabilities=[
403403
ModelCapability.CHAT,
404+
ModelCapability.REASONING,
404405
ModelCapability.IMAGE,
405406
ModelCapability.AUDIO,
406407
],
@@ -483,7 +484,7 @@ def setup_projects():
483484
ELLM_DESCRIBE_DEPLOYMENT = DeploymentCreate(
484485
model_id=ELLM_DESCRIBE_CONFIG.id,
485486
name=f"{ELLM_DESCRIBE_CONFIG.name} Deployment",
486-
provider="custom",
487+
provider=CloudProvider.ELLM,
487488
routing_id=ELLM_DESCRIBE_CONFIG.id,
488489
api_base=ENV_CONFIG.test_llm_api_base,
489490
)

0 commit comments

Comments
 (0)