Skip to content

Commit db903c7

Browse files
committed
Merge remote-tracking branch 'origin/main' into main_ds_eval
2 parents e3ef66a + 2b147bd commit db903c7

File tree

5 files changed

+107
-65
lines changed

5 files changed

+107
-65
lines changed

alias/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,10 @@ alias_agent run --mode finance --task "Analyze Tesla's Q4 2024 financial perform
207207
# Data Science mode
208208
alias_agent run --mode ds \
209209
--task "Analyze the distribution of incidents across categories in 'incident_records.csv' to identify imbalances, inconsistencies, or anomalies, and determine their root cause." \
210-
--files ./docs/data/incident_records.csv
210+
--datasource ./docs/data/incident_records.csv
211211
```
212212

213-
**Note**: Files uploaded with `--files` are automatically copied to `/workspace` in the sandbox. Generated files are available in `sessions_mount_dir` subdirectories.
213+
**Note**: Files uploaded with `--datasource` are automatically copied to `/workspace` in the sandbox. Generated files are available in `sessions_mount_dir` subdirectories.
214214

215215
#### Enable Long-Term Memory Service (General Mode Only)
216216
To enable the long-term memory service in General mode, you need to:

alias/README_ZH.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,10 +208,10 @@ alias_agent run --mode finance --task "Analyze Tesla's Q4 2024 financial perform
208208
# 数据科学(Data Science)模式
209209
alias_agent run --mode ds \
210210
--task "Analyze the distribution of incidents across categories in 'incident_records.csv' to identify imbalances, inconsistencies, or anomalies, and determine their root cause." \
211-
--files ./docs/data/incident_records.csv
211+
--datasource ./docs/data/incident_records.csv
212212
```
213213

214-
**注意**:使用 `--files` 上传的文件会自动复制到沙盒中的 `/workspace`。生成的文件可在 `sessions_mount_dir` 的子目录中找到。
214+
**注意**:使用 `--datasource` 上传的文件会自动复制到沙盒中的 `/workspace`。生成的文件可在 `sessions_mount_dir` 的子目录中找到。
215215

216216
#### 启用长期记忆服务(仅限通用模式)
217217
要在通用模式下启用长期记忆服务,您需要:

alias/src/alias/agent/agents/_data_science_agent.py

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -411,16 +411,13 @@ async def generate_response(
411411
report_md,
412412
report_html,
413413
) = await report_generator.generate_report()
414-
md_report_path = os.path.join(
415-
self.tmp_file_storage_dir,
416-
"detailed_report.md",
417-
)
418-
html_report_path = os.path.join(
419-
self.tmp_file_storage_dir,
420-
"detailed_report.html",
421-
)
422414

423-
if report_html:
415+
if report_md:
416+
md_report_path = os.path.join(
417+
self.tmp_file_storage_dir,
418+
"detailed_report.md",
419+
)
420+
424421
await self.toolkit.call_tool_function(
425422
ToolUseBlock(
426423
type="tool_use",
@@ -432,25 +429,35 @@ async def generate_response(
432429
},
433430
),
434431
)
435-
await self.toolkit.call_tool_function(
436-
ToolUseBlock(
437-
type="tool_use",
438-
id=str(uuid.uuid4()),
439-
name="write_file",
440-
input={
441-
"path": html_report_path,
442-
"content": report_html,
443-
},
444-
),
445-
)
446432
response = (
447433
f"{response}\n\n"
448434
"The detailed report (markdown version) has been saved to "
449-
f"{md_report_path}.\n"
450-
"The detailed report (html version) has been saved to "
451-
f"{html_report_path}."
435+
f"{md_report_path}."
452436
)
453437

438+
if report_html:
439+
html_report_path = os.path.join(
440+
self.tmp_file_storage_dir,
441+
"detailed_report.html",
442+
)
443+
444+
await self.toolkit.call_tool_function(
445+
ToolUseBlock(
446+
type="tool_use",
447+
id=str(uuid.uuid4()),
448+
name="write_file",
449+
input={
450+
"path": html_report_path,
451+
"content": report_html,
452+
},
453+
),
454+
)
455+
response = (
456+
f"{response}\n\n"
457+
"The detailed report (html version) has been saved to "
458+
f"{html_report_path}."
459+
)
460+
454461
kwargs["response"] = response
455462
structured_output = {}
456463

alias/src/alias/agent/agents/ds_agent_utils/built_in_prompt/_log_to_markdown_prompt.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,13 @@ Each task in the roadmap contains:
4444
- Brief Response
4545
- Detailed Report
4646
- You should choose the template that is most appropriate for the user task.
47-
- **Brief Respoonse Template** should ONLY be used when the user asks for a simple data query task, where ONLY numeric or concise string values are returned, and complex analysis or research are not required.
48-
- **Detailed Report Template** should be used when the user asks for a detailed analysis of the data, where the analysis and research are required.
47+
- **Brief Respoonse Template** should ONLY be used when the user asks for a
48+
simple, static data point (e.g., a total count or a specific value), where
49+
the answer is returned as a single numeric or concise string value with no
50+
analysis, transformation, comparison, or interpretation required.
51+
- **Detailed Report Template** should be used whenever the task involves
52+
distribution, discrepancy, imbalance, comparison, trend, root cause, or
53+
any form of analysis, interpretation, or evidence generation.
4954

5055
2. Data Source Constraints
5156
- **ONLY use information explicitly present in the log file**
@@ -103,7 +108,7 @@ You MUST ensure all captions, subtitles, and other contents in the report are wr
103108
- "brief_response": The brief response content.
104109
- When 'is_brief_response' is True, this field should be fulfilled with the brief response content following the **Brief Response Template**.
105110
- When 'is_brief_response' is False, this field should be a concise summary of the detailed report in in markdown format illustrating the key findings and insights.
106-
- "detailed_report_content": The detailed markdown report content following the **Detailed Report Template**. This field is ONLY generated when 'is_brief_response' is False, otherwise fulfill an empty string.
111+
- "report_content": The detailed markdown report content following the **Detailed Report Template**. This field is ONLY generated when 'is_brief_response' is False, otherwise fulfill an empty string.
107112
- You MUST ensure the JSON object is a valid JSON string and can be parsed by json.loads().
108113
- Double check all escapes are valid.
109114

alias/src/alias/agent/agents/ds_agent_utils/report_generation.py

Lines changed: 65 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,62 @@
11
# -*- coding: utf-8 -*-
22
import os
3-
import json
43
import time
54
from typing import Tuple
65

76
import dotenv
7+
from pydantic import BaseModel, Field
8+
89
from agentscope.message import Msg
910

1011
from .utils import model_call_with_retry, get_prompt_from_file
11-
12-
1312
from .ds_config import PROMPT_DS_BASE_PATH
1413

1514
dotenv.load_dotenv()
1615

1716

17+
class ReportResponse(BaseModel):
18+
is_brief_response: bool = Field(
19+
...,
20+
description=(
21+
"True if the response is a brief response; "
22+
"False if it includes a detailed report."
23+
),
24+
)
25+
26+
brief_response: str = Field(
27+
...,
28+
description=(
29+
"The brief response content. "
30+
"When 'is_brief_response' is True, this field contains the full "
31+
"brief response following the Brief Response Template. "
32+
"When 'is_brief_response' is False, this field contains a concise "
33+
"markdown summary of the detailed report, highlighting key "
34+
"findings and insights."
35+
),
36+
json_schema_extra={
37+
"example": (
38+
"The analysis shows a 15% increase in user engagement "
39+
"after the feature update."
40+
),
41+
},
42+
)
43+
44+
report_content: str = Field(
45+
...,
46+
description=(
47+
"The detailed markdown report content following the "
48+
"Detailed Report Template. This field MUST be an empty "
49+
"string ('') when 'is_brief_response' is True. It MUST contain "
50+
"the full detailed report when 'is_brief_response' is False."
51+
),
52+
json_schema_extra={
53+
"example": "### User Task Description...\n"
54+
"### Associated Data Sources...\n"
55+
"### Research Conclusion...\n### Task1...### Task2...",
56+
},
57+
)
58+
59+
1860
class ReportGenerator:
1961
def __init__(self, model, formatter, memory_log: str):
2062
self.model = model
@@ -62,22 +104,13 @@ async def _log_to_markdown(self) -> str:
62104
self.formatter,
63105
msgs=msgs,
64106
msg_name="Report Generation",
107+
structured_model=ReportResponse,
65108
)
66109

67-
raw_response = res.content[0]["text"]
68-
69-
# TODO: More robust response cleaning
70-
if raw_response.strip().startswith("```json"):
71-
cleaned = raw_response.strip()[len("```json") :].lstrip("\n")
72-
if cleaned.endswith("```"):
73-
cleaned = cleaned[:-3].rstrip()
74-
response = cleaned
75-
else:
76-
response = raw_response.strip()
77110
end_time = time.time()
78-
# print(response)
79111
print(f"Log to markdown took {end_time - start_time} seconds")
80-
return response
112+
113+
return res.content[-1]["input"]
81114

82115
async def _convert_to_html(self, markdown_content: str) -> str:
83116
start_time = time.time()
@@ -103,37 +136,34 @@ async def _convert_to_html(self, markdown_content: str) -> str:
103136
return response.content[0]["text"]
104137

105138
async def generate_report(self) -> Tuple[str, str, str]:
106-
markdown_response = await self._log_to_markdown()
107-
108-
# responseFormat: {
109-
# "is_brief_response": True,
110-
# "brief_response": brief_response_content,
111-
# "report_content": detailed_report_content
112-
# }
113-
114-
try:
115-
markdown_content = json.loads(markdown_response)
116-
except json.JSONDecodeError as e:
117-
print(f"Error parsing JSON response: {e}")
118-
print(f"Response content: {markdown_response}")
119-
raise
139+
"""
140+
responseFormat: {
141+
"is_brief_response": True,
142+
"brief_response": brief_response_content,
143+
"report_content": detailed_report_content
144+
}
145+
"""
146+
markdown_content = await self._log_to_markdown()
120147

121148
if (
122149
str(markdown_content.get("is_brief_response", False)).lower()
123150
== "true"
124151
):
125152
# During brief response mode,
126153
# directly return the brief response to the user.
127-
return markdown_content["brief_response"], "", ""
154+
return markdown_content.get("brief_response", ""), "", ""
128155
else:
129156
# In detailed report mode,
130157
# convert the detailed report to HTML and return it to the user;
131158
# if a brief summary of the report is needed,
132159
# it can be obtained through markdown_content["brief_response"].
160+
html_content = ""
161+
if os.getenv("ENABLE_HTML_REPORT", "ON").lower() != "off":
162+
html_content = await self._convert_to_html(
163+
markdown_content.get("report_content", ""),
164+
)
133165
return (
134-
markdown_content["brief_response"],
135-
markdown_content["report_content"],
136-
await self._convert_to_html(
137-
markdown_content["report_content"],
138-
),
166+
markdown_content.get("brief_response", ""),
167+
markdown_content.get("report_content", ""),
168+
html_content,
139169
)

0 commit comments

Comments
 (0)