Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
6baa994
fix: video gen exclude edit_file
Feb 6, 2026
9c8205c
Merge branch 'main' of https://github.com/modelscope/ms-agent
Feb 9, 2026
94b833f
Merge branch 'main' of https://github.com/modelscope/ms-agent
Mar 5, 2026
11bcf06
enhance deep research v2
Mar 11, 2026
dd3f185
Merge branch 'main' of https://github.com/modelscope/ms-agent
Mar 12, 2026
05cb676
refactor: optimize architecture, restrict researcher report edits, up…
Mar 15, 2026
1dd49b6
fix local code executor; refine workflow and prompt (03)
Mar 17, 2026
c27347f
fix timeout; support for running subagent in process; support for pos…
Mar 19, 2026
5920dc4
refine readme for deep research; add run_benchmark.sh; fix counting c…
Mar 20, 2026
14cb873
Merge branch 'main' of https://github.com/modelscope/ms-agent into fe…
Mar 20, 2026
986b34f
enrich researcher's reflection strategy to enhance stability
Mar 23, 2026
e6ad3e9
Merge branch 'main' of https://github.com/modelscope/ms-agent
Mar 24, 2026
b437bdc
thinking support beta; search_file_content fix
Mar 25, 2026
86a3ba8
support API key pool construction; support for reasoning model
Mar 27, 2026
8a32ce5
fix lint
Mar 27, 2026
393f23c
support for vertex type anthropic llm; refine reasoning output
Mar 27, 2026
49919ab
Merge branch 'main' of https://github.com/modelscope/ms-agent
Mar 31, 2026
f08a15e
support for response api; optimize log output formatting
Apr 2, 2026
5825111
Merge branch 'main' of https://github.com/modelscope/ms-agent into fe…
Apr 9, 2026
6a1ba9f
fix lint
Apr 9, 2026
c6b1e3b
Merge branch 'main' of https://github.com/modelscope/ms-agent
Apr 10, 2026
2f2dbb0
Merge branch 'main' of https://github.com/modelscope/ms-agent
Apr 13, 2026
9841444
Merge branch 'feat/dr_reasoning' of https://github.com/alcholiclg/ms-…
Apr 13, 2026
ec5fb1a
feat(search): add Tavily engine, extract fetcher, and dr_bench wiring
Apr 20, 2026
cb75af3
fix(jina): align reader with websearch (meta fetch + playwright fallb…
Apr 20, 2026
70eb30c
Merge remote-tracking branch 'upstream/main' into hanzhou/0413
Apr 21, 2026
406a9ef
feat(tools): harden Jina reader cascade and search integration
Apr 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ celerybeat-schedule
# Environments
projects/deep_research/.env
.venv
.venv_pac_curl/
.env
env/
venv/
Expand Down
75 changes: 57 additions & 18 deletions ms_agent/agent/llm_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def __init__(
self.tool_manager: Optional[ToolManager] = None
self.memory_tools: List[Memory] = []
self.rag: Optional[RAG] = None
self.knowledge_search: Optional[SirschmunkSearch] = None
self.knowledge_search: Optional[SirchmunkSearch] = None
self.llm: Optional[LLM] = None
self.runtime: Optional[Runtime] = None
self.max_chat_round: int = 0
Expand Down Expand Up @@ -582,16 +582,41 @@ def reasoning_output(self) -> str:
DictConfig({}))
return str(getattr(generation_config, 'reasoning_output', 'stdout'))

def _write_reasoning(self, text: str):
_THINKING_SEP = '─' * 40

def _reasoning_stream(self):
if self.reasoning_output.lower() == 'stdout':
return sys.stdout
return sys.stderr

def _write_reasoning(self, text: str, dim: bool = False):
if not text:
return
if self.reasoning_output.lower() == 'stdout':
sys.stdout.write(text)
sys.stdout.flush()
stream = self._reasoning_stream()
use_ansi = hasattr(stream, 'isatty') and stream.isatty()
if dim and use_ansi:
text = f'\033[2m{text}\033[0m'
stream.write(text)
stream.flush()

def _write_thinking_header(self):
stream = self._reasoning_stream()
use_ansi = hasattr(stream, 'isatty') and stream.isatty()
line = f'{self._THINKING_SEP[:15]} thinking {self._THINKING_SEP[25:]}'
if use_ansi:
stream.write(f'\033[2m{line}\033[0m\n')
else:
stream.write(line + '\n')
stream.flush()

def _write_thinking_footer(self):
stream = self._reasoning_stream()
use_ansi = hasattr(stream, 'isatty') and stream.isatty()
if use_ansi:
stream.write(f'\n\033[2m{self._THINKING_SEP}\033[0m\n')
else:
# default: stderr
sys.stderr.write(text)
sys.stderr.flush()
stream.write(f'\n{self._THINKING_SEP}\n')
stream.flush()

@property
def system(self):
Expand Down Expand Up @@ -875,13 +900,13 @@ async def step(
is_first = True
_response_message = None
_printed_reasoning_header = False
_printed_reasoning_footer = False
for _response_message in self.llm.generate(
messages, tools=tools):
if is_first:
messages.append(_response_message)
is_first = False

# Optional: stream model "thinking/reasoning" if available.
if self.show_reasoning:
reasoning_text = (
getattr(_response_message, 'reasoning_content', '')
Expand All @@ -892,19 +917,33 @@ async def step(
new_reasoning = reasoning_text[len(_reasoning):]
if new_reasoning:
if not _printed_reasoning_header:
self._write_reasoning('[thinking]:\n')
self._write_thinking_header()
_printed_reasoning_header = True
self._write_reasoning(new_reasoning)
self._write_reasoning(new_reasoning, dim=True)
_reasoning = reasoning_text

new_content = _response_message.content[len(_content):]
sys.stdout.write(new_content)
sys.stdout.flush()
if new_content:
if _printed_reasoning_header and not _printed_reasoning_footer:
self._write_thinking_footer()
_printed_reasoning_footer = True
sys.stdout.write(new_content)
sys.stdout.flush()
_content = _response_message.content
messages[-1] = _response_message
yield messages
if self.show_reasoning and _printed_reasoning_header:
self._write_reasoning('\n')
if _printed_reasoning_header and not _printed_reasoning_footer:
self._write_thinking_footer()

# Handle reasoning summaries that arrive after content
if self.show_reasoning and _response_message is not None:
final_reasoning = getattr(_response_message,
'reasoning_content', '') or ''
if final_reasoning and not _printed_reasoning_header:
self._write_thinking_header()
self._write_reasoning(final_reasoning, dim=True)
self._write_thinking_footer()

sys.stdout.write('\n')
else:
_response_message = self.llm.generate(messages, tools=tools)
Expand All @@ -913,9 +952,9 @@ async def step(
getattr(_response_message, 'reasoning_content', '')
or '')
if reasoning_text:
self._write_reasoning('[thinking]:\n')
self._write_reasoning(reasoning_text)
self._write_reasoning('\n')
self._write_thinking_header()
self._write_reasoning(reasoning_text, dim=True)
self._write_thinking_footer()
if _response_message.content:
self.log_output('[assistant]:')
self.log_output(_response_message.content)
Expand Down
170 changes: 158 additions & 12 deletions ms_agent/llm/anthropic_llm.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,120 @@
import inspect
from typing import Any, Dict, Generator, Iterator, List, Optional, Union

import httpx
import json
from ms_agent.llm import LLM
from ms_agent.llm.utils import Message, Tool, ToolCall
from ms_agent.utils import assert_package_exist, retry
from ms_agent.utils.constants import get_service_config
from omegaconf import DictConfig, OmegaConf


class _SSEEventInjector(httpx.SyncByteStream):
"""Injects SSE ``event:`` lines into DashScope's streaming response.

DashScope only emits ``data:`` lines in its SSE stream. The Anthropic
SDK's ``MessageStream`` relies on ``event:`` lines to route events.
This wrapper extracts the ``type`` from the JSON payload and prepends
the matching ``event:`` line so the SDK can process events correctly.
"""

def __init__(self, stream):
self._stream = stream
self._buffer = b''

def __iter__(self):
for chunk in self._stream:
self._buffer += chunk
while b'\n\n' in self._buffer:
block, self._buffer = self._buffer.split(b'\n\n', 1)
if block.strip():
yield self._inject(block) + b'\n\n'
if self._buffer.strip():
yield self._inject(self._buffer) + b'\n\n'

@staticmethod
def _inject(block: bytes) -> bytes:
for line in block.split(b'\n'):
s = line.strip()
if s.startswith(b'data:'):
try:
t = json.loads(s[5:].strip()).get('type', '')
if t:
return b'event: ' + t.encode() + b'\n' + block
except (json.JSONDecodeError, ValueError):
pass
return block

def close(self):
if hasattr(self._stream, 'close'):
self._stream.close()


class DashScopeAnthropicTransport(httpx.BaseTransport):
"""Routes Anthropic SDK requests to DashScope's compatible-mode endpoint.

DashScope returns Anthropic-format SSE responses for vertex AI Claude models
(e.g. vertex_ai.claude-opus-4-6), but expects requests at
/compatible-mode/v1/chat/completions with a native protocol flag rather than
the standard Anthropic /v1/messages path. This transport transparently
rewrites URL, auth headers, and body so the Anthropic SDK works unmodified.
"""

def __init__(self,
dashscope_url: str,
api_key: str,
supplier: Optional[str] = None):
self.dashscope_url = dashscope_url
self.api_key = api_key
self.supplier = supplier
self._transport = httpx.HTTPTransport()

def handle_request(self, request: httpx.Request) -> httpx.Response:
body = json.loads(request.content)
is_streaming = bool(body.get('stream'))

ext = body.setdefault('dashscope_extend_params', {})
ext['using_native_protocol'] = True
if self.supplier and 'supplier' not in ext:
ext['supplier'] = self.supplier

new_headers = {
'content-type': 'application/json',
'authorization': f'Bearer {self.api_key}',
}
_skip = frozenset({
'x-api-key', 'content-type', 'authorization', 'content-length',
'host', 'transfer-encoding'
})
for key, value in request.headers.items():
k = key.lower()
if k not in _skip and not k.startswith('anthropic'):
new_headers[key] = value

new_content = json.dumps(body).encode('utf-8')
new_request = httpx.Request(
method=request.method,
url=self.dashscope_url,
headers=new_headers,
content=new_content,
extensions=request.extensions,
)
response = self._transport.handle_request(new_request)

if is_streaming:
return httpx.Response(
status_code=response.status_code,
headers=response.headers,
stream=_SSEEventInjector(response.stream),
extensions=response.extensions,
)
return response

def close(self):
self._transport.close()


class Anthropic(LLM):

def __init__(
Expand All @@ -29,10 +136,31 @@ def __init__(
if not api_key:
raise ValueError('Anthropic API key is required.')

self.client = anthropic.Anthropic(
api_key=api_key,
base_url=base_url,
)
self._is_dashscope = bool(base_url and 'dashscope' in base_url.lower())

if self._is_dashscope:
dashscope_url = base_url
if not dashscope_url.rstrip('/').endswith('/chat/completions'):
dashscope_url = dashscope_url.rstrip('/') + '/chat/completions'
supplier = config.llm.get('dashscope_supplier', None)
transport = DashScopeAnthropicTransport(
dashscope_url=dashscope_url,
api_key=api_key,
supplier=supplier,
)
http_client = httpx.Client(
transport=transport,
timeout=httpx.Timeout(300.0, connect=60.0),
)
self.client = anthropic.Anthropic(
api_key=api_key,
http_client=http_client,
)
else:
self.client = anthropic.Anthropic(
api_key=api_key,
base_url=base_url,
)

self.args: Dict = OmegaConf.to_container(
getattr(config, 'generation_config', DictConfig({})))
Expand Down Expand Up @@ -112,24 +240,42 @@ def _call_llm(self,
formatted_messages = formatted_messages[1:]

max_tokens = kwargs.pop('max_tokens', 16000)
extra_body = kwargs.get('extra_body', {})
enable_thinking = extra_body.get('enable_thinking', False)
thinking_budget = extra_body.get('thinking_budget', max_tokens)

enable_thinking = bool(kwargs.pop('enable_thinking', False))
thinking_budget = kwargs.pop('thinking_budget', None)
thinking_type = kwargs.pop('thinking_type', None)

raw_extra_body = kwargs.pop('extra_body', {}) or {}
extra_body = dict(raw_extra_body) if isinstance(raw_extra_body,
dict) else {}
enable_thinking = bool(
extra_body.pop('enable_thinking', enable_thinking))
thinking_budget = extra_body.pop('thinking_budget',
thinking_budget) or max_tokens
thinking_type = extra_body.pop('thinking_type', thinking_type)
for _k in ('show_reasoning', 'reasoning_output'):
extra_body.pop(_k, None)

params = {
'model': self.model,
'messages': formatted_messages,
'max_tokens': max_tokens,
'thinking': {
'type': 'enabled' if enable_thinking else 'disabled',
'budget_tokens': thinking_budget
}
'max_tokens': max_tokens
}

if thinking_type == 'adaptive':
params['thinking'] = {'type': 'adaptive'}
elif enable_thinking:
params['thinking'] = {
'type': 'enabled',
'budget_tokens': thinking_budget,
}

if system:
params['system'] = system
if tools:
params['tools'] = tools
if extra_body:
kwargs['extra_body'] = extra_body
params.update(kwargs)

if stream:
Expand Down
Loading
Loading