|
11 | 11 | # See the License for the specific language governing permissions and |
12 | 12 | # limitations under the License. |
13 | 13 | # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= |
| 14 | +""" |
| 15 | +GAIA Benchmark Example |
| 16 | +
|
| 17 | +Prerequisites: |
| 18 | +1. Docker Desktop installed and running |
| 19 | +2. Build the Docker image (one-time setup): |
| 20 | + cd examples/runtimes/ubuntu_docker_runtime |
| 21 | + ./manage_camel_docker.sh build |
| 22 | +
|
| 23 | +3. Set environment variables in .env: |
| 24 | + - OPENAI_API_KEY (or other API keys) |
| 25 | +
|
| 26 | +4. Clean up stale containers if needed: |
| 27 | + docker stop $(docker ps -q) && docker rm $(docker ps -aq) |
| 28 | +""" |
14 | 29 |
|
| 30 | +from dotenv import load_dotenv |
15 | 31 |
|
16 | 32 | from camel.agents import ChatAgent |
17 | 33 | from camel.benchmarks import DefaultGAIARetriever, GAIABenchmark |
| 34 | +from camel.configs import ChatGPTConfig |
| 35 | +from camel.embeddings import AzureEmbedding |
18 | 36 | from camel.models import ModelFactory |
19 | | -from camel.runtimes import RemoteHttpRuntime |
| 37 | +from camel.runtimes import DockerRuntime |
20 | 38 | from camel.toolkits import CodeExecutionToolkit |
21 | 39 | from camel.types import ModelPlatformType, ModelType, StorageType |
22 | 40 |
|
| 41 | +load_dotenv() |
| 42 | + |
23 | 43 | retriever = DefaultGAIARetriever( |
24 | | - vector_storage_local_path="local_data2/", storage_type=StorageType.QDRANT |
| 44 | + vector_storage_local_path="local_data2/", |
| 45 | + storage_type=StorageType.QDRANT, |
| 46 | + embedding_model=AzureEmbedding(), |
25 | 47 | ) |
26 | 48 |
|
27 | 49 | benchmark = GAIABenchmark( |
|
36 | 58 |
|
37 | 59 |
|
38 | 60 | toolkit = CodeExecutionToolkit(verbose=True) |
39 | | -runtime = RemoteHttpRuntime("localhost").add( |
| 61 | +runtime = DockerRuntime( |
| 62 | + "my-camel", port=0 |
| 63 | +).add( # port=0 uses random available port |
40 | 64 | toolkit.get_tools(), |
41 | 65 | "camel.toolkits.CodeExecutionToolkit", |
| 66 | + dict(verbose=True), |
42 | 67 | ) |
43 | 68 |
|
44 | 69 | task_prompt = """ |
|
57 | 82 | a string. |
58 | 83 | """.strip() |
59 | 84 |
|
60 | | -tools = runtime.get_tools() |
61 | | - |
62 | 85 | model = ModelFactory.create( |
63 | 86 | model_platform=ModelPlatformType.DEFAULT, |
64 | 87 | model_type=ModelType.DEFAULT, |
| 88 | + model_config_dict=ChatGPTConfig().as_dict(), |
65 | 89 | ) |
66 | 90 |
|
| 91 | +# use context manager to auto-cleanup container on exit |
| 92 | +with runtime as r: |
| 93 | + r.wait() |
| 94 | + print("Docker runtime is ready.") |
67 | 95 |
|
68 | | -agent = ChatAgent( |
69 | | - task_prompt, |
70 | | - model, |
71 | | - tools=tools, |
72 | | -) |
| 96 | + tools = r.get_tools() |
| 97 | + agent = ChatAgent( |
| 98 | + task_prompt, |
| 99 | + model, |
| 100 | + tools=tools, |
| 101 | + ) |
73 | 102 |
|
74 | | -result = benchmark.run(agent, "valid", level="all", subset=3) |
75 | | -print("correct:", result["correct"]) |
76 | | -print("total:", result["total"]) |
| 103 | + result = benchmark.run(agent, "valid", level="all", subset=10) |
| 104 | + print("correct:", result["correct"]) |
| 105 | + print("total:", result["total"]) |
77 | 106 |
|
78 | 107 | # ruff: noqa: E501 |
79 | 108 | """ |
|
0 commit comments