diff --git a/.gitignore b/.gitignore index 6900f8f..61f5df3 100644 --- a/.gitignore +++ b/.gitignore @@ -170,4 +170,8 @@ _build/ logs/ -.DS_Store \ No newline at end of file +.DS_Store + +# RAG data +local_data/ +vim_docs/ \ No newline at end of file diff --git a/crab-benchmark-v0/dataset/ubuntu/camel-task-example.json b/crab-benchmark-v0/dataset/ubuntu/camel-task-example.json new file mode 100644 index 0000000..a0efb01 --- /dev/null +++ b/crab-benchmark-v0/dataset/ubuntu/camel-task-example.json @@ -0,0 +1,15 @@ +{ + "description": "Using Firefox, navigate to the CAMEL-AI GitHub repository (https://github.com/camel-ai/camel). Download the example code file 'examples/role_playing.py', then use Visual Studio Code to open it and save a copy to '/home/crab/camel_examples/role_playing.py'.", + "tasks": [ + { + "task": "a313ea4d-e501-4971-b4fe-db2aad19eac1", + "attribute": { + "url": "https://raw.githubusercontent.com/camel-ai/camel/master/examples/role_playing.py", + "file_path": "/home/crab/camel_examples/role_playing.py" + }, + "output": "/home/crab/camel_examples/role_playing.py" + } + ], + "adjlist": "0", + "id": "camel-example-001" +} diff --git a/crab/agents/backend_models/__init__.py b/crab/agents/backend_models/__init__.py index 5f36882..9175cf1 100644 --- a/crab/agents/backend_models/__init__.py +++ b/crab/agents/backend_models/__init__.py @@ -13,6 +13,7 @@ # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== # ruff: noqa: F401 from .camel_model import CamelModel +from .camel_rag_model import CamelRAGModel from .claude_model import ClaudeModel from .gemini_model import GeminiModel from .openai_model import OpenAIModel diff --git a/crab/agents/backend_models/camel_rag_model.py b/crab/agents/backend_models/camel_rag_model.py new file mode 100644 index 0000000..252473c --- /dev/null +++ b/crab/agents/backend_models/camel_rag_model.py @@ -0,0 +1,128 @@ +# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== +from typing import Any, List, Optional, Tuple + +from crab import BackendOutput, MessageType +from crab.agents.backend_models.camel_model import CamelModel +from camel.messages import BaseMessage +from langchain.schema import Document + +try: + from camel.embeddings import OpenAIEmbedding + from camel.retrievers import VectorRetriever + from camel.storages import QdrantStorage + RAG_ENABLED = True +except ImportError: + RAG_ENABLED = False + + +class CamelRAGModel(CamelModel): + def __init__( + self, + model: str, + model_platform: str, + parameters: dict[str, Any] | None = None, + history_messages_len: int = 0, + embedding_model: Optional[str] = "text-embedding-3-small", + collection_name: str = "knowledge_base", + vector_storage_path: str = "local_data", + top_k: int = 3, + similarity_threshold: float = 0.75, + ) -> None: + if not RAG_ENABLED: + raise ImportError( + "Please install RAG dependencies: " + "pip install camel-ai[embeddings,retrievers,storages]" + ) + + super().__init__(model, model_platform, parameters, history_messages_len) + + self.embedding_model = OpenAIEmbedding() if embedding_model else None + + if self.embedding_model: + self.vector_storage = QdrantStorage( + vector_dim=self.embedding_model.get_output_dim(), + path=vector_storage_path, + collection_name=collection_name, + ) + self.retriever = VectorRetriever( + embedding_model=self.embedding_model + ) + else: + self.vector_storage = None + self.retriever = None + + self.top_k = top_k + self.similarity_threshold = similarity_threshold + + def process_documents(self, content_path: str) -> None: + if not self.retriever or not self.vector_storage: + raise ValueError("RAG components not initialized") + + self.retriever.process( + content=content_path, + storage=self.vector_storage, + ) + + def _enhance_with_context(self, messages: List[Tuple[str, MessageType]]) -> List[Tuple[str, MessageType]]: + if not self.retriever or not self.vector_storage: + return messages + + query = next( + (msg[0] for msg in messages if msg[1] != MessageType.IMAGE_JPG_BASE64), + "" + ) + + try: + retrieved_info = self.retriever.query( + query=query, + top_k=self.top_k, + similarity_threshold=self.similarity_threshold, + ) + except Exception: + return messages + + if not retrieved_info: + return messages + + if not retrieved_info[0].get('payload'): + return messages + + context = "Relevant context:\n\n" + for info in retrieved_info: + context += f"From {info.get('content path', 'unknown')}:\n" + context += f"{info.get('text', '')}\n\n" + + enhanced_messages = [] + enhanced_messages.append((context, MessageType.TEXT)) + enhanced_messages.extend(messages) + + return enhanced_messages + + def chat(self, messages: List[Tuple[str, MessageType]]) -> BackendOutput: + enhanced_messages = self._enhance_with_context(messages) + return super().chat(enhanced_messages) + + def get_relevant_content(self, query: str) -> List[Document]: + if not self.vector_storage: + return [] + + try: + return self.retriever.query( + query=query, + top_k=self.top_k, + similarity_threshold=self.similarity_threshold, + ) + except Exception: + return [] diff --git a/examples/camel_example.py b/examples/camel_example.py new file mode 100644 index 0000000..f56825a --- /dev/null +++ b/examples/camel_example.py @@ -0,0 +1,79 @@ +# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== +from termcolor import colored +import os + +from crab import Benchmark, create_benchmark +from crab.agents.backend_models.camel_model import CamelModel +from crab.agents.policies import SingleAgentPolicy +from crab.benchmarks.template import template_benchmark_config +from camel.types import ModelType, ModelPlatformType +from camel.models import ModelFactory + + +def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy): + for step in range(20): + print("=" * 40) + print(f"Start agent step {step}:") + observation = benchmark.observe()["template_env"] + print(f"Current environment observation: {observation}") + response = agent.chat( + { + "template_env": [ + (f"Current environment observation: {observation}", 0), + ] + } + ) + print(colored(f"Agent take action: {response}", "blue")) + + for action in response: + response = benchmark.step( + action=action.name, + parameters=action.arguments, + env_name=action.env, + ) + print( + colored( + f'Action "{action.name}" success, stat: ' + f"{response.evaluation_results}", + "green", + ) + ) + if response.terminated: + print("=" * 40) + print( + colored( + f"Task finished, result: {response.evaluation_results}", "green" + ) + ) + return + + +if __name__ == "__main__": + benchmark = create_benchmark(template_benchmark_config) + #TODO: Use new task config + task, action_space = benchmark.start_task("0") + env_descriptions = benchmark.get_env_descriptions() + + # TODO: Use local model + camel_model = CamelModel( + model="gpt-4o", + model_platform=ModelPlatformType.OPENAI, + parameters={"temperature": 0.7}, + ) + agent = SingleAgentPolicy(model_backend=camel_model) + agent.reset(task.description, action_space, env_descriptions) + print("Start performing task: " + colored(f'"{task.description}"', "green")) + start_benchmark(benchmark, agent) + benchmark.reset() diff --git a/examples/camel_rag_example.py b/examples/camel_rag_example.py new file mode 100644 index 0000000..1221fb9 --- /dev/null +++ b/examples/camel_rag_example.py @@ -0,0 +1,159 @@ +# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. =========== +from termcolor import colored +import os +import requests +from bs4 import BeautifulSoup +from urllib.parse import urljoin + +from crab import Benchmark, create_benchmark +from crab.agents.backend_models.camel_rag_model import CamelRAGModel +from crab.agents.policies import SingleAgentPolicy +from crab.benchmarks.template import template_benchmark_config +from camel.types import ModelType, ModelPlatformType + +# TODO: Add new benchmark template +def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy): + for step in range(20): + print("=" * 40) + print(f"Start agent step {step}:") + observation = benchmark.observe()["template_env"] + print(f"Current environment observation: {observation}") + + try: + rag_content = agent.model_backend.get_relevant_content(str(observation)) + print(colored("\nRelevant RAG content:", "magenta")) + if rag_content: + for idx, content in enumerate(rag_content, 1): + print(colored(f"\nDocument {idx}:", "magenta")) + if isinstance(content, dict): + print(colored(f"Source: {content.get('content path', 'Unknown')}", "yellow")) + print(colored(f"Content: {content.get('text', '')[:500]}...", "white")) + else: + print(colored(f"Content: {str(content)[:500]}...", "white")) + else: + print(colored("No relevant content found", "yellow")) + except Exception as e: + print(colored(f"Error retrieving RAG content: {str(e)}", "red")) + + response = agent.chat( + { + "template_env": [ + (f"Current environment observation: {observation}", 0), + ] + } + ) + print(colored(f"\nAgent take action: {response}", "blue")) + + for action in response: + response = benchmark.step( + action=action.name, + parameters=action.arguments, + env_name=action.env, + ) + print( + colored( + f'Action "{action.name}" success, stat: ' + f"{response.evaluation_results}", + "green", + ) + ) + if response.terminated: + print("=" * 40) + print( + colored( + f"Task finished, result: {response.evaluation_results}", + "green" + ) + ) + return + + +def prepare_vim_docs(): + """Prepare Vim documentation for RAG""" + print(colored("Starting Vim documentation preparation...", "yellow")) + base_url = "https://vimdoc.sourceforge.net/htmldoc/usr_07.html" + content_dir = "vim_docs" + os.makedirs(content_dir, exist_ok=True) + + print(colored("Fetching main page...", "yellow")) + response = requests.get(base_url) + soup = BeautifulSoup(response.text, 'html.parser') + + main_content = soup.get_text(separator='\n', strip=True) + with open(os.path.join(content_dir, "main.txt"), 'w', encoding='utf-8') as f: + f.write(f"Source: {base_url}\n\n{main_content}") + + links = [link for link in soup.find_all('a') + if link.get('href') and not link.get('href').startswith(('#', 'http'))] + total_links = len(links) + print(colored(f"Found {total_links} documentation pages to process", "yellow")) + + processed_files = [] + for idx, link in enumerate(links, 1): + href = link.get('href') + full_url = urljoin(base_url, href) + try: + print(colored(f"Processing page {idx}/{total_links}: {href}", "yellow")) + + page_response = requests.get(full_url) + page_soup = BeautifulSoup(page_response.text, 'html.parser') + for tag in page_soup(['script', 'style']): + tag.decompose() + content = page_soup.get_text(separator='\n', strip=True) + + filename = os.path.join(content_dir, f"{href.replace('/', '_')}.txt") + with open(filename, 'w', encoding='utf-8') as f: + f.write(f"Source: {full_url}\n\n{content}") + processed_files.append(filename) + print(colored(f"✓ Saved {href}", "green")) + + except Exception as e: + print(colored(f"✗ Error processing {full_url}: {e}", "red")) + + print(colored("Documentation preparation completed!", "green")) + return processed_files + + +if __name__ == "__main__": + print(colored("=== Starting RAG-enhanced benchmark ===", "cyan")) + + print(colored("\nInitializing benchmark environment...", "yellow")) + benchmark = create_benchmark(template_benchmark_config) + task, action_space = benchmark.start_task("0") + env_descriptions = benchmark.get_env_descriptions() + + doc_files = prepare_vim_docs() + + print(colored("\nInitializing RAG model...", "yellow")) + rag_model = CamelRAGModel( + model="gpt-4o", + model_platform=ModelPlatformType.OPENAI, + parameters={"temperature": 0.7} + ) + + print(colored("Processing documents for RAG...", "yellow")) + for doc_file in doc_files: + print(colored(f"Processing {doc_file}...", "yellow")) + rag_model.process_documents(doc_file) + print(colored("RAG model initialization complete!", "green")) + + print(colored("\nSetting up agent...", "yellow")) + agent = SingleAgentPolicy(model_backend=rag_model) + agent.reset(task.description, action_space, env_descriptions) + + print(colored("\nStarting benchmark execution:", "cyan")) + print("Start performing task: " + colored(f'"{task.description}"', "green")) + start_benchmark(benchmark, agent) + benchmark.reset()