Skip to content

Commit 18e662d

Browse files
committed
wip
1 parent b38d838 commit 18e662d

9 files changed

Lines changed: 145 additions & 42 deletions

File tree

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from .base import Callback
2-
from .run_status import RunStatus
2+
from .runtime import Runtime
33
from .utils import callbacks_mapping

modelscope_agent/callbacks/base.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from omegaconf import DictConfig
44

5-
from .run_status import RunStatus
5+
from .runtime import Runtime
66
from ..llm.utils import Message
77

88

@@ -11,20 +11,20 @@ class Callback:
1111
def __init__(self, config: DictConfig):
1212
self.config = config
1313

14-
async def on_task_begin(self, run_status: RunStatus, messages: List[Message]):
14+
async def on_task_begin(self, runtime: Runtime, messages: List[Message]):
1515
pass
1616

17-
async def on_generate_response(self, run_status: RunStatus, messages: List[Message]):
17+
async def on_generate_response(self, runtime: Runtime, messages: List[Message]):
1818
pass
1919

20-
async def after_generate_response(self, run_status: RunStatus, messages: List[Message]):
20+
async def after_generate_response(self, runtime: Runtime, messages: List[Message]):
2121
pass
2222

23-
async def on_tool_call(self, run_status: RunStatus, messages: List[Message]):
23+
async def on_tool_call(self, runtime: Runtime, messages: List[Message]):
2424
pass
2525

26-
async def after_tool_call(self, run_status: RunStatus, messages: List[Message]):
26+
async def after_tool_call(self, runtime: Runtime, messages: List[Message]):
2727
pass
2828

29-
async def on_task_end(self, run_status: RunStatus, messages: List[Message]):
29+
async def on_task_end(self, runtime: Runtime, messages: List[Message]):
3030
pass
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
from dataclasses import dataclass
2+
from typing import Optional
23

34
from modelscope_agent.llm.llm import LLM
45

56

67
@dataclass
7-
class RunStatus:
8+
class Runtime:
89

910
should_stop: bool = False
1011

11-
llm: LLM = None
12+
llm: LLM = None
13+
14+
tag: Optional[str] = None

modelscope_agent/cli/code/artifact_callback.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
import os.path
2-
import re
32
from typing import List
43

54
from omegaconf import DictConfig
65

7-
from modelscope_agent.callbacks import Callback, RunStatus
6+
from modelscope_agent.callbacks import Callback, Runtime
87
from modelscope_agent.llm.llm import LLM
98
from modelscope_agent.llm.utils import Message
109
from modelscope_agent.tools.filesystem_tool import FileSystemTool
@@ -15,8 +14,9 @@ class ArtifactCallback(Callback):
1514
def __init__(self, config: DictConfig):
1615
super().__init__(config)
1716
self.file_system = FileSystemTool(config)
17+
self.code = False
1818

19-
async def on_task_begin(self, run_status: RunStatus, messages: List[Message]):
19+
async def on_task_begin(self, runtime: Runtime, messages: List[Message]):
2020
await self.file_system.connect()
2121

2222
@staticmethod
@@ -45,7 +45,7 @@ def extract_metadata(config: DictConfig, llm: LLM, messages: List[Message]):
4545
_response_message = llm.generate(_messages)
4646
return _response_message.content
4747

48-
async def after_generate_response(self, run_status: RunStatus, messages: List[Message]):
48+
async def after_generate_response(self, runtime: Runtime, messages: List[Message]):
4949
last_message_content = messages[-1].content
5050
if '</code>' in last_message_content:
5151
code = ''
@@ -64,21 +64,27 @@ async def after_generate_response(self, run_status: RunStatus, messages: List[Me
6464
elif recording:
6565
code += message.content
6666
if code:
67+
self.code = True
6768
try:
68-
code_file = self.extract_metadata(self.config, run_status.llm, messages)
69+
code_file = self.extract_metadata(self.config, runtime.llm, messages)
6970
await self.file_system.create_directory('output')
7071
await self.file_system.write_file(os.path.join('output', code_file), code)
7172
messages.append(Message(role='assistant', content=f'Original query: {messages[1].content}'
7273
f'Task sunning successfully, '
7374
f'the code has been saved in the {code_file} file.'))
7475
except Exception as e:
75-
print(f'Original query: {messages[1].content}. Task sunning failed with error {e} please consider retry generation.', flush=True)
76-
messages.append(Message(role='assistant', content=f'Original query: {messages[1].content}'
76+
raise RuntimeError(f'Original query: {messages[1].content}. Task sunning failed with error {e} please consider retry generation.', flush=True)
77+
messages.append(Message(role='user', content=f'Original query: {messages[1].content}'
7778
f'Task sunning failed with error {e} please consider retry generation.'))
7879
else:
79-
print(
80+
raise RuntimeError(
8081
f'Original query: {messages[1].content}. Task sunning failed, code format error, please consider retry generation.',
8182
flush=True)
82-
messages.append(Message(role='assistant', content=f'Original query: {messages[1].content}'
83+
messages.append(Message(role='user', content=f'Original query: {messages[1].content}'
8384
f'Task sunning failed, code format error, please consider retry generation.'))
84-
run_status.should_stop = True
85+
runtime.should_stop = True
86+
87+
async def on_task_end(self, runtime: Runtime, messages: List[Message]):
88+
if runtime.tag != 'Default workflow':
89+
if not self.code:
90+
raise RuntimeError()

modelscope_agent/cli/code/coding.yaml

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,21 @@ generation_config:
99
temperature: 0.5
1010
top_k: 50
1111
stream: false
12+
max_completion_tokens: 65536
13+
max_tokens: 131072
1214
extra_body:
1315
enable_thinking: false
1416

1517
prompt:
1618
system: |
1719
You are a senior software architect. Your responsibility is to break down original requirements into implementable modules and assign tasks for each module into subtasks. The initiation of subtasks requires calling the `split_to_sub_task` tool, which can start all sub tasks as you need at one time. In this process, you need to answer the following questions:
1820
19-
1. What is the original requirement? Does it involve frontend or backend code? What programming language is needed?
21+
1. What is the original requirement? Does it involve frontend or backend code? What programming language is needed?
2022
2. How many modules should it be split into? What functions are needed for each module? How to combine each file?
2123
3. Due to code complexity, you need to inform your subtasks that code blocks need to be wrapped with <code></code> tags, as this code will subsequently be stored in local files.
2224
4. You should **MENTION CLEARLY** the detailed functions and interfaces your subtasks should follow, and relations between each module/file, in case they do duplicate works.
23-
5. Beauty and Functionality is the most important thing
24-
6. One task only writes one code file, all files should be put flatten in one folder, so no parent folder name should be given
25+
5. Beauty and Functionality is the most important thing, Never use invalid image links, use images from unsplash-like websites
26+
6. One task only writes one code file, all files should be put in one folder, so no parent folder name should be given
2527
2628
An example:
2729
query: Please help me write an e-commerce website with Christmas atmosphere
@@ -31,8 +33,10 @@ prompt:
3133
1. An e-commerce website requires these modules: categories, goods, detail good, purchase, history orders, shopping cart, favourites, carousel images and so on.
3234
2. The website needs Christmas features, so the CSS style should be mainly red and white, decorated with Christmas images
3335
3. Due to the complexity of the code engineering, I cannot complete this complex goal in a single file, so I need to split the tasks
34-
4. One task should only writes one code file, all files should be put flatten in one folder, so I should not give parent folder prefix
36+
4. One task should only writes one code file, all files should be put in one folder, so I should not give parent folder prefix
3537
5. I should give very detail designs of the pages functions(especially the functions interact with other code files), file import relations(This is the most important thing!!), in case the sub tasks work abnormally
38+
6. I should specify the programming language of all the subtasks
39+
7. I should tell the subtasks to use images from unsplash-like websites
3640
...
3741
3842
How many modules(subtasks) I should split?
@@ -71,19 +75,22 @@ prompt:
7175
{
7276
"system": "You are a software engineer which helps me to finish a part of my job. IMPORTANT: You should invoke other tasks' code files to finish the whole job. The code file you need to invoke will be given in the query.",
7377
"query": "The original query is to write an e-commerce website with Christmas atmosphere, your part of job is only one code file: the index.html page, you should follow instructions:
74-
1. Do not give fake image addresses, use links of Unsplash like website
78+
1. Do not give fake image addresses! use links of Unsplash like website
7579
2. You should make the page as beautiful as you can
7680
3. DO not add ``` around the code, wrap the code with <code></code>
77-
4. All files are flatten, always import other tasks' code file from the same directory
78-
5. Link to purchase.js to fulfill the purchase section
79-
6. Link to goods.js to fulfill the goods section
80-
7. Link to invoke cart.js to ..."
81+
5. All files are in one folder, always import other tasks' code file from the same directory!
82+
6. Link to purchase.js to fulfill the purchase section
83+
7. Link to goods.js to fulfill the goods section
84+
8. Link to invoke cart.js to ..."
85+
9. Use javascript instead of node.js, do not use ES6 modules
86+
10. Never give dummy information!
8187
},
8288
...
8389
]
8490
8591
callbacks:
8692
- artifact_callback
93+
- evaluator_callback
8794

8895
memory:
8996

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
from typing import List
2+
3+
from omegaconf import DictConfig
4+
5+
from modelscope_agent.callbacks import Callback, Runtime
6+
from modelscope_agent.llm.utils import Message
7+
from modelscope_agent.utils import get_logger
8+
9+
logger = get_logger()
10+
11+
12+
class EvaluatorCallback(Callback):
13+
14+
_system = """You are a software architecture evaluator whose job is to assess whether software architectures created by other architects are reasonable. The actual workflow is:
15+
16+
1. An original requirement is given
17+
2. A software architect provides the modules that need to be designed and breaks these modules down into different subtasks for completion, with each subtask responsible for writing one specific file
18+
3. After the subtasks are completed, they are automatically saved to disk, and these modules will work together collaboratively
19+
20+
However, software architects have a high probability of making mistakes, including but not limited to:
21+
22+
1. Modules that don't meet user requirements, such as insufficient content richness. In this case, you can try prompting the software architect about whether there are other features that can be added, and you can also provide examples
23+
2. Dependencies between subtasks must be clear. For example, if file1 in subtask1 needs to import and use file2 from subtask2 and file3 from subtask3, you need to carefully review whether the dependency plan is reasonable
24+
3. Since files between subtasks work collaboratively, the interfaces between them must be reliable and clear. You need to check whether the interface design provided by the architect is sufficient to support collaborative work requirements
25+
4. Subtasks may use different programming languages or different technology(we don't want to use es6 modules or node.js) or encounter other scenarios where they cannot work together collaboratively. You need to carefully point these out
26+
5. The architect will call `split_task`to start all subtasks at one time, which needs a list of systems and queries. You need to check each subtask's arguments(system and query), whether the information is sufficient for collaborative work requirements.
27+
6. Check whether the architect has mentioned all subtasks the generated files are in one folder, so when importing other files, no dir prefix should be given, and the resources(links, images) should be valid or from the unsplash-like websites, do not use local invalid images.
28+
7. Your reply should be like `You should ...`, `Does you consider...`, or `Here is a problem which...`, at last you should say: `Now correct these problems and keep the good part and generate a new plan and call `split_task` again`
29+
8. Some designs from the architect may be good, point out the good parts to encourage the architect to keep them!
30+
9. **Do not be too strict!**, ignore trivial warnings. in case you and the architect cause a dead loop
31+
32+
Your specific job is:
33+
Carefully analyze the errors within, prompt the software architect to make corrections, and when you feel the plan already meets the requirements, output the <OK> character, at which point the conversation will terminate.
34+
Remember: You are not a software architect, you are an evaluator. You don't need to design architecture, you only need to point out or inspire awareness of the errors.
35+
Now Begin:
36+
37+
"""
38+
39+
def __init__(self, config: DictConfig):
40+
super().__init__(config)
41+
self.argue_ended = False
42+
self.argue_round = 0
43+
44+
async def after_generate_response(self, runtime: Runtime, messages: List[Message]):
45+
if runtime.tag != 'Default workflow':
46+
self.argue_ended = True
47+
return
48+
49+
if len(messages) > 3:
50+
temp = messages[:2] + messages[-1:]
51+
messages.clear()
52+
messages.extend(temp)
53+
54+
if self.argue_round >= 1:
55+
self.argue_ended = True
56+
return
57+
58+
query = (f'The original requirement is: \n```text\n{messages[1].content}\n```\n\n '
59+
f'The plan given by the architect is: \n```text\n{messages[2].content}\n```\n\n '
60+
f'The task arguments is : \n```json\n{messages[2].tool_calls[0]}\n```\n\n')
61+
62+
_messages = [
63+
Message(role='system', content=self._system),
64+
Message(role='user', content=query),
65+
]
66+
if getattr(self.config.generation_config, 'stream', False):
67+
message = None
68+
for msg in runtime.llm.generate(_messages):
69+
message = runtime.llm.merge_stream_message(message, msg)
70+
71+
_response_message = message
72+
else:
73+
_response_message = runtime.llm.generate(_messages)
74+
self.argue_round += 1
75+
for line in _response_message.content.split('\n'):
76+
for _line in line.split('\\n'):
77+
logger.info(f'[Evaluator] {_line}')
78+
79+
if '<OK>' in _response_message.content or self.argue_ended:
80+
self.argue_ended = True
81+
else:
82+
messages[-1].tool_calls = None
83+
messages.append(Message(role='user', content=_response_message.content))
84+
85+
async def after_tool_call(self, runtime: Runtime, messages: List[Message]):
86+
runtime.should_stop = runtime.should_stop and self.argue_ended

modelscope_agent/engine/plan/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from pydantic import ConfigDict
55

6-
from modelscope_agent.callbacks import RunStatus
6+
from modelscope_agent.callbacks import Runtime
77
from modelscope_agent.llm.utils import Message
88

99

@@ -13,10 +13,10 @@ def __init__(self, config: ConfigDict):
1313
self.config = config
1414

1515
@abstractmethod
16-
def generate_plan(self, messages: List[Message], run_status: RunStatus):
16+
def generate_plan(self, messages: List[Message], runtime: Runtime):
1717
pass
1818

1919
@abstractmethod
20-
def update_plan(self, messages: List[Message], run_status: RunStatus):
20+
def update_plan(self, messages: List[Message], runtime: Runtime):
2121
pass
2222

modelscope_agent/engine/plan/observer_planer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from pydantic import ConfigDict
44

5-
from modelscope_agent.callbacks import RunStatus
5+
from modelscope_agent.callbacks import Runtime
66
from modelscope_agent.engine.plan.base import Planer
77
from modelscope_agent.llm.llm import LLM
88
from modelscope_agent.llm.utils import Message
@@ -15,8 +15,8 @@ def __init__(self, config: ConfigDict):
1515
observer_config = self.config.planer.observer
1616
self.observer = LLM.from_config(observer_config)
1717

18-
def generate_plan(self, messages: List[Message], run_status: RunStatus):
18+
def generate_plan(self, messages: List[Message], runtime: Runtime):
1919
pass
2020

21-
def update_plan(self, messages: List[Message], run_status: RunStatus):
21+
def update_plan(self, messages: List[Message], runtime: Runtime):
2222
pass

modelscope_agent/engine/simple_engine.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from omegaconf import DictConfig
88

99
from modelscope_agent.callbacks import Callback
10-
from modelscope_agent.callbacks import RunStatus
10+
from modelscope_agent.callbacks import Runtime
1111
from modelscope_agent.callbacks import callbacks_mapping
1212
from modelscope_agent.config import Config
1313
from modelscope_agent.engine.memory import memory_mapping
@@ -61,7 +61,7 @@ def __init__(self,
6161
self.config = Config.from_task(task_dir_or_id, env)
6262
self.llm = LLM.from_config(self.config)
6363
self.callbacks = []
64-
self.run_status = RunStatus(llm=self.llm)
64+
self.runtime = Runtime(llm=self.llm)
6565
self.trust_remote_code = kwargs.get('trust_remote_code', False)
6666
self.config.trust_remote_code = self.trust_remote_code
6767
self._register_callback_from_config()
@@ -97,7 +97,7 @@ def _register_callback_from_config(self):
9797

9898
async def _loop_callback(self, point, messages: List[Message]):
9999
for callback in self.callbacks:
100-
await getattr(callback, point)(self.run_status, messages)
100+
await getattr(callback, point)(self.runtime, messages)
101101

102102
async def _parallel_tool_call(self, messages: List[Message]):
103103
tool_call_result = await self.tool_manager.parallel_call_tool(messages[-1].tool_calls)
@@ -160,7 +160,7 @@ def _refine_memory(self, messages: List[Message]):
160160

161161
def _update_plan(self, messages: List[Message]):
162162
if self.planer:
163-
self.planer.update_plan(self.llm, messages, self.run_status)
163+
self.planer.update_plan(self.llm, messages, self.runtime)
164164
return messages
165165

166166
def handle_stream_message(self):
@@ -183,11 +183,12 @@ async def run(self, prompt, **kwargs):
183183
self._prepare_planer()
184184
self._prepare_rag()
185185
tag = kwargs.get('tag', 'Default workflow')
186+
self.runtime.tag = tag
186187
messages = self._prepare_messages(prompt)
187188
await self._loop_callback('on_task_begin', messages)
188189
if self.planer:
189-
self.planer.generate_plan(messages, self.run_status)
190-
while not self.run_status.should_stop:
190+
self.planer.generate_plan(messages, self.runtime)
191+
while not self.runtime.should_stop:
191192
await self._loop_callback('on_generate_response', messages)
192193
messages = self._refine_memory(messages)
193194
messages = self._update_plan(messages)
@@ -208,7 +209,7 @@ async def run(self, prompt, **kwargs):
208209
if _response_message.tool_calls:
209210
await self._parallel_tool_call(messages)
210211
else:
211-
self.run_status.should_stop = True
212+
self.runtime.should_stop = True
212213
await self._loop_callback('after_tool_call', messages)
213214
await self._loop_callback('on_task_end', messages)
214215
await self._cleanup_tools()

0 commit comments

Comments
 (0)