type(data profile): format the LLMCallManager

StCarmen · StCarmen · commit 635c1be3be17 · 2026-01-28T14:55:47.000+08:00
diff --git a/alias/src/alias/agent/agents/data_source/_data_profiler_factory.py b/alias/src/alias/agent/agents/data_source/_data_profiler_factory.py
@@ -9,12 +9,14 @@
 from loguru import logger
 import pandas as pd
 from sqlalchemy import inspect, text, create_engine
+from agentscope.message import Msg
+
 from alias.agent.agents.data_source._typing import SourceType
 from alias.agent.agents.ds_agent_utils import (
     get_prompt_from_file,
 )
-from alias.agent.utils.unified_model_call_interface import (
-    UnifiedModelCallInterface,
+from alias.agent.utils.llm_call_manager import (
+    LLMCallManager,
 )
 
 
@@ -32,7 +34,7 @@ def __init__(
         self,
         path: str,
         source_type: SourceType,
-        model_interface: UnifiedModelCallInterface,
+        llm_call_manager: LLMCallManager,
     ):
         """Initialize the data profiler with API key, data path and type.
 
@@ -44,7 +46,7 @@ def __init__(
         self.path = path
         self.file_name = os.path.basename(path)
         self.source_type = source_type
-        self.model_interface = model_interface
+        self.llm_call_manager = llm_call_manager
 
         self.source_types_2_prompts = {
             SourceType.CSV: "_profile_csv_prompt.md",
@@ -57,8 +59,8 @@ def __init__(
             raise ValueError(f"Unsupported source type: {source_type}")
         self.prompt = self._load_prompt(source_type)
 
-        base_model_name = self.model_interface.get_base_model_name()
-        vl_model_name = self.model_interface.get_vl_model_name()
+        base_model_name = self.llm_call_manager.get_base_model_name()
+        vl_model_name = self.llm_call_manager.get_vl_model_name()
 
         self.source_types_2_models = {
             SourceType.CSV: base_model_name,
@@ -159,9 +161,14 @@ async def _call_model(
         self,
         content: Any,
     ) -> Dict[str, Any]:
-        response = await self.model_interface.unified_model_call_interface(
+        sys_prompt = "You are a helpful AI assistant for database management."
+        msgs = [
+            Msg("system", sys_prompt, "system"),
+            Msg("user", content, "user"),
+        ]
+        response = await self.llm_call_manager(
             model_name=self.model_name,
-            user_content=content,
+            messages=msgs,
         )
         response = BaseDataProfiler.tool_clean_json(response)
         return response
@@ -654,7 +661,7 @@ class DataProfilerFactory:
 
     @staticmethod
     def get_profiler(
-        model_interface: UnifiedModelCallInterface,
+        llm_call_manager: LLMCallManager,
         path: str,
         source_type: SourceType,
     ) -> BaseDataProfiler:
@@ -676,25 +683,25 @@ def get_profiler(
             return ImageProfiler(
                 path=path,
                 source_type=source_type,
-                model_interface=model_interface,
+                llm_call_manager=llm_call_manager,
             )
         elif source_type == SourceType.CSV:
             return CsvProfiler(
                 path=path,
                 source_type=source_type,
-                model_interface=model_interface,
+                llm_call_manager=llm_call_manager,
             )
         elif source_type == SourceType.EXCEL:
             return ExcelProfiler(
                 path=path,
                 source_type=source_type,
-                model_interface=model_interface,
+                llm_call_manager=llm_call_manager,
             )
         elif source_type == SourceType.RELATIONAL_DB:
             return RelationalDatabaseProfiler(
                 path=path,
                 source_type=source_type,
-                model_interface=model_interface,
+                llm_call_manager=llm_call_manager,
             )
         else:
             raise ValueError(f"Unsupported source type: {source_type}")
diff --git a/alias/src/alias/agent/agents/data_source/data_profile.py b/alias/src/alias/agent/agents/data_source/data_profile.py
@@ -15,8 +15,8 @@
     get_workspace_file,
 )
 from alias.runtime.alias_sandbox.alias_sandbox import AliasSandbox
-from alias.agent.utils.unified_model_call_interface import (
-    UnifiedModelCallInterface,
+from alias.agent.utils.llm_call_manager import (
+    LLMCallManager,
 )
 
 
@@ -74,7 +74,7 @@ async def data_profile(
     sandbox: AliasSandbox,
     sandbox_path: str,
     source_type: SourceType,
-    model_interface: UnifiedModelCallInterface,
+    llm_call_manager: LLMCallManager,
 ) -> Dict[str, Any]:
     """
     Generates a detailed profile and summary for data source using LLMs.
@@ -104,7 +104,7 @@ async def data_profile(
         raise ValueError(f"Unsupported source type {source_type}")
 
     profiler = DataProfilerFactory.get_profiler(
-        model_interface=model_interface,
+        llm_call_manager=llm_call_manager,
         path=local_path,
         source_type=source_type,
     )
diff --git a/alias/src/alias/agent/agents/data_source/data_source.py b/alias/src/alias/agent/agents/data_source/data_source.py
@@ -24,8 +24,8 @@
 from alias.agent.tools.sandbox_util import (
     copy_local_file_to_workspace,
 )
-from alias.agent.utils.unified_model_call_interface import (
-    UnifiedModelCallInterface,
+from alias.agent.utils.llm_call_manager import (
+    LLMCallManager,
 )
 
 
@@ -186,16 +186,16 @@ def get_coarse_desc(self):
     async def prepare_profile(
         self,
         sandbox: Sandbox,
-        model_interface: UnifiedModelCallInterface,
+        llm_call_manager: LLMCallManager,
     ) -> Optional[Dict[str, Any]]:
         """Run type-specific profiling."""
-        if model_interface and not self.profile:
+        if llm_call_manager and not self.profile:
             try:
                 self.profile = await data_profile(
                     sandbox=sandbox,
                     sandbox_path=self.source_access,
                     source_type=self.source_type,
-                    model_interface=model_interface,
+                    llm_call_manager=llm_call_manager,
                 )
                 logger.info(
                     "Profiling successfully: "
@@ -252,7 +252,7 @@ class DataSourceManager:
     def __init__(
         self,
         sandbox: Sandbox,
-        model_interface: UnifiedModelCallInterface,
+        llm_call_manager: LLMCallManager,
     ):
         """Initialize an empty data source manager."""
         self._data_sources: Dict[str, DataSource] = {}
@@ -265,7 +265,7 @@ def __init__(
 
         self.toolkit = AliasToolkit(sandbox=sandbox)
 
-        self.model_interface = model_interface
+        self.llm_call_manager = llm_call_manager
 
     def add_data_source(
         self,
@@ -338,7 +338,7 @@ async def prepare_data_sources(self) -> None:
             await data_source.prepare(self.toolkit)
             await data_source.prepare_profile(
                 self.toolkit.sandbox,
-                self.model_interface,
+                self.llm_call_manager,
             )
 
     def _generate_name(self, endpoint: str) -> str:
diff --git a/alias/src/alias/agent/run.py b/alias/src/alias/agent/run.py
@@ -43,8 +43,8 @@
     init_ds_toolkit,
 )
 
-from alias.agent.utils.unified_model_call_interface import (
-    UnifiedModelCallInterface,
+from alias.agent.utils.llm_call_manager import (
+    LLMCallManager,
 )
 
 MODEL_FORMATTER_MAPPING = {
@@ -116,15 +116,15 @@ async def arun_meta_planner(
     ds_toolkit = init_ds_toolkit(worker_full_toolkit)
 
     # Initialize data source manager
-    model_interface = UnifiedModelCallInterface(
+    llm_call_manager = LLMCallManager(
         base_model_name=MODEL_CONFIG_NAME,
         vl_model_name=VL_MODEL_NAME,
         model_formatter_mapping=MODEL_FORMATTER_MAPPING,
     )
     data_manager = await prepare_data_sources(
         session_service=session_service,
         sandbox=sandbox,
-        model_interface=model_interface,
+        llm_call_manager=llm_call_manager,
     )
     add_data_source_tools(
         data_manager,
@@ -362,7 +362,7 @@ async def arun_datascience_agent(
 
     global_toolkit = AliasToolkit(sandbox, add_all=True)
     worker_toolkit = init_ds_toolkit(global_toolkit)
-    model_interface = UnifiedModelCallInterface(
+    llm_call_manager = LLMCallManager(
         base_model_name=MODEL_CONFIG_NAME,
         vl_model_name=VL_MODEL_NAME,
         model_formatter_mapping=MODEL_FORMATTER_MAPPING,
@@ -371,7 +371,7 @@ async def arun_datascience_agent(
         session_service=session_service,
         sandbox=sandbox,
         binded_toolkit=worker_toolkit,
-        model_interface=model_interface,
+        llm_call_manager=llm_call_manager,
     )
 
     try:
diff --git a/alias/src/alias/agent/utils/llm_call_manager.py b/alias/src/alias/agent/utils/llm_call_manager.py
@@ -1,11 +1,11 @@
 # -*- coding: utf-8 -*-
 import asyncio
-from typing import Any, Dict, Literal
+from typing import Any, Dict, Literal, Type, AsyncGenerator
 from agentscope.message import Msg
-from agentscope.model import DashScopeChatModel
-from agentscope.formatter import DashScopeChatFormatter
+from agentscope.model import DashScopeChatModel, ChatResponse
 
 from tenacity import retry, stop_after_attempt, wait_fixed
+from pydantic import BaseModel
 
 
 @retry(
@@ -14,28 +14,21 @@
     reraise=True,
     # before_sleep=_print_exc_on_retry
 )
-async def _model_call_with_retry(
+async def model_call_with_retry(
     model: DashScopeChatModel = None,
-    formatter: DashScopeChatFormatter = None,
-    sys_content: Any = None,
-    user_content: Any = None,
+    messages: list[dict[str, Any]] = None,
     tool_json_schemas: list[dict] | None = None,
     tool_choice: Literal["auto", "none", "required"] | str | None = None,
+    structured_model: Type[BaseModel] | None = None,
     msg_name: str = "model_call",
-    structured_model=None,
+    **kwargs: Any,
 ) -> Msg:
-    msgs = [
-        Msg("system", sys_content, "system"),
-        Msg("user", user_content, "user"),
-    ]
-
-    format_msgs = await formatter.format(msgs=msgs)
-
     res = await model(
-        format_msgs,
+        messages,
         tools=tool_json_schemas,
         tool_choice=tool_choice,
         structured_model=structured_model,
+        kwargs=kwargs,
     )
 
     if model.stream:
@@ -52,7 +45,7 @@ async def _model_call_with_retry(
     return msg
 
 
-class UnifiedModelCallInterface:
+class LLMCallManager:
     def __init__(
         self,
         base_model_name: str,
@@ -63,35 +56,31 @@ def __init__(
         self.vl_model_name = vl_model_name
         self.model_formatter_mapping = model_formatter_mapping
 
-    async def unified_model_call_interface(
-        self,
-        model_name: str = None,
-        user_content: Any = None,
-        sys_content: Any = None,
-    ) -> Msg:
-        model, formatter = self._load_model_and_formatter(
-            model_name=model_name,
-        )
-        if sys_content is None:
-            sys_content = (
-                "You are a helpful AI assistant for database management."
-            )
-
-        raw_response = await _model_call_with_retry(
-            model=model,
-            formatter=formatter,
-            sys_content=sys_content,
-            user_content=user_content,
-        )
-        response = raw_response.content[0]["text"]
-        return response
-
-    def _load_model_and_formatter(self, model_name: str):
-        model, formatter = self.model_formatter_mapping[model_name]
-        return model, formatter
-
     def get_base_model_name(self) -> str:
         return self.base_model_name
 
     def get_vl_model_name(self) -> str:
         return self.vl_model_name
+
+    async def __call__(
+        self,
+        model_name: str,
+        messages: list[dict[str, Any]],
+        tools: list[dict] | None = None,
+        tool_choice: Literal["auto", "none", "required"] | str | None = None,
+        structured_model: Type[BaseModel] | None = None,
+        **kwargs: Any,
+    ) -> ChatResponse | AsyncGenerator[ChatResponse, None]:
+        model, formatter = self.model_formatter_mapping[model_name]
+        format_msgs = await formatter.format(msgs=messages)
+        raw_response = await model_call_with_retry(
+            model=model,
+            messages=format_msgs,
+            tool_json_schemas=tools,
+            tool_choice=tool_choice,
+            structured_model=structured_model,
+            msg_name="model_call",
+            kwargs=kwargs,
+        )
+        response = raw_response.content[0]["text"]
+        return response
diff --git a/alias/src/alias/agent/utils/prepare_data_source.py b/alias/src/alias/agent/utils/prepare_data_source.py
@@ -5,8 +5,8 @@
 
 from alias.agent.agents.data_source.data_source import DataSourceManager
 from alias.agent.tools import AliasToolkit, share_tools
-from alias.agent.utils.unified_model_call_interface import (
-    UnifiedModelCallInterface,
+from alias.agent.utils.llm_call_manager import (
+    LLMCallManager,
 )
 
 if os.getenv("TEST_MODE") not in ["local", "runtime-test"]:
@@ -21,12 +21,12 @@ async def prepare_data_sources(
     session_service: SessionService,
     sandbox: Sandbox,
     binded_toolkit: AliasToolkit = None,
-    model_interface: UnifiedModelCallInterface = None,
+    llm_call_manager: LLMCallManager = None,
 ):
     data_manager = await build_data_manager(
         session_service,
         sandbox,
-        model_interface,
+        llm_call_manager,
     )
     if len(data_manager):
         await add_user_data_message(session_service, data_manager)
@@ -40,9 +40,9 @@ async def prepare_data_sources(
 async def build_data_manager(
     session_service: SessionService,
     sandbox: Sandbox,
-    model_interface: UnifiedModelCallInterface,
+    llm_call_manager: LLMCallManager,
 ):
-    data_manager = DataSourceManager(sandbox, model_interface)
+    data_manager = DataSourceManager(sandbox, llm_call_manager)
     if (
         hasattr(session_service.session_entity, "data_config")
         and session_service.session_entity.data_config