Fix: Gaia bench update (#3482)

hesamsheikh · Wendong-Fan · web-flow · commit 348d9a2bfb18 · 2025-12-02T17:04:02.000+08:00
Co-authored-by: Wendong-Fan &lt;w3ndong.fan@gmail.com&gt;
diff --git a/camel/benchmarks/gaia.py b/camel/benchmarks/gaia.py
@@ -165,6 +165,8 @@ def load(self, force_download=False):
             force_download (bool, optional): Whether to
                 force download the data.
         """
+        import pandas as pd
+
         if force_download:
             logger.info("Force downloading data.")
             self.download()
@@ -181,15 +183,17 @@ def load(self, force_download=False):
         # Load metadata for both validation and test datasets
         for path, label in zip([valid_dir, test_dir], ["valid", "test"]):
             self._data[label] = []
-            with open(path / "metadata.jsonl", "r") as f:
-                lines = f.readlines()
-                for line in lines:
-                    data = json.loads(line)
-                    if data["task_id"] == "0-0-0-0-0":
-                        continue
-                    if data["file_name"]:
-                        data["file_name"] = path / data["file_name"]
-                    self._data[label].append(data)
+            metadata_file = path / "metadata.parquet"
+            df = pd.read_parquet(metadata_file)
+            for _, row in df.iterrows():
+                data = row.to_dict()
+                if data["task_id"] == "0-0-0-0-0":
+                    continue
+                # convert level to int (parquet stores as string)
+                data["Level"] = int(data["Level"])
+                if data["file_name"]:
+                    data["file_name"] = path / data["file_name"]
+                self._data[label].append(data)
         return self
 
     @property
@@ -333,7 +337,7 @@ def _process_result(
         }
         self._results.append(result_data)
         file_obj.write(
-            json.dumps(result_data, indent=2) + "\n", ensure_ascii=False
+            json.dumps(result_data, indent=2, ensure_ascii=False) + "\n"
         )
         file_obj.flush()
 
@@ -354,7 +358,7 @@ def _handle_error(
         }
         self._results.append(error_data)
         file_obj.write(
-            json.dumps(error_data, indent=2) + "\n", ensure_ascii=False
+            json.dumps(error_data, indent=2, ensure_ascii=False) + "\n"
         )
         file_obj.flush()
 
diff --git a/camel/toolkits/pptx_toolkit.py b/camel/toolkits/pptx_toolkit.py
@@ -680,8 +680,8 @@ def _handle_step_by_step_process(
             slide_width_inch (float): The width of the slide in inches.
             slide_height_inch (float): The height of the slide in inches.
         """
-        import pptx
         from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE
+        from pptx.enum.text import MSO_ANCHOR, PP_ALIGN
         from pptx.util import Inches, Pt
 
         steps = slide_json['bullet_points']
@@ -710,8 +710,8 @@ def _handle_step_by_step_process(
                 text_frame = shape.text_frame
                 text_frame.clear()
                 paragraph = text_frame.paragraphs[0]
-                paragraph.alignment = pptx.enum.text.PP_ALIGN.CENTER
-                text_frame.vertical_anchor = pptx.enum.text.MSO_ANCHOR.MIDDLE
+                paragraph.alignment = PP_ALIGN.CENTER
+                text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
                 self._format_text(
                     paragraph, step.removeprefix(STEP_BY_STEP_PROCESS_MARKER)
                 )
@@ -732,8 +732,8 @@ def _handle_step_by_step_process(
                 text_frame = shape.text_frame
                 text_frame.clear()
                 paragraph = text_frame.paragraphs[0]
-                paragraph.alignment = pptx.enum.text.PP_ALIGN.CENTER
-                text_frame.vertical_anchor = pptx.enum.text.MSO_ANCHOR.MIDDLE
+                paragraph.alignment = PP_ALIGN.CENTER
+                text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
                 self._format_text(
                     paragraph, step.removeprefix(STEP_BY_STEP_PROCESS_MARKER)
                 )
diff --git a/camel/toolkits/resend_toolkit.py b/camel/toolkits/resend_toolkit.py
@@ -12,7 +12,7 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 
-from typing import Dict, List, Optional, cast
+from typing import Dict, List, Optional
 
 from camel.toolkits.base import BaseToolkit
 from camel.toolkits.function_tool import FunctionTool
@@ -141,7 +141,7 @@ def send_email(
         if reply_to:
             params["reply_to"] = reply_to
         if tags:
-            params["tags"] = cast('list[resend.emails._tag.Tag]', tags)
+            params["tags"] = tags  # type: ignore[typeddict-item]
         if headers:
             params["headers"] = headers
 
diff --git a/examples/benchmarks/gaia.py b/examples/benchmarks/gaia.py
@@ -11,17 +11,39 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+"""
+GAIA Benchmark Example
+
+Prerequisites:
+1. Docker Desktop installed and running
+2. Build the Docker image (one-time setup):
+   cd examples/runtimes/ubuntu_docker_runtime
+   ./manage_camel_docker.sh build
+
+3. Set environment variables in .env:
+   - OPENAI_API_KEY (or other API keys)
+
+4. Clean up stale containers if needed:
+   docker stop $(docker ps -q) && docker rm $(docker ps -aq)
+"""
 
+from dotenv import load_dotenv
 
 from camel.agents import ChatAgent
 from camel.benchmarks import DefaultGAIARetriever, GAIABenchmark
+from camel.configs import ChatGPTConfig
+from camel.embeddings import AzureEmbedding
 from camel.models import ModelFactory
-from camel.runtimes import RemoteHttpRuntime
+from camel.runtimes import DockerRuntime
 from camel.toolkits import CodeExecutionToolkit
 from camel.types import ModelPlatformType, ModelType, StorageType
 
+load_dotenv()
+
 retriever = DefaultGAIARetriever(
-    vector_storage_local_path="local_data2/", storage_type=StorageType.QDRANT
+    vector_storage_local_path="local_data2/",
+    storage_type=StorageType.QDRANT,
+    embedding_model=AzureEmbedding(),
 )
 
 benchmark = GAIABenchmark(
@@ -36,9 +58,12 @@
 
 
 toolkit = CodeExecutionToolkit(verbose=True)
-runtime = RemoteHttpRuntime("localhost").add(
+runtime = DockerRuntime(
+    "my-camel", port=0
+).add(  # port=0 uses random available port
     toolkit.get_tools(),
     "camel.toolkits.CodeExecutionToolkit",
+    dict(verbose=True),
 )
 
 task_prompt = """
@@ -57,23 +82,27 @@
         a string.
         """.strip()
 
-tools = runtime.get_tools()
-
 model = ModelFactory.create(
     model_platform=ModelPlatformType.DEFAULT,
     model_type=ModelType.DEFAULT,
+    model_config_dict=ChatGPTConfig().as_dict(),
 )
 
+# use context manager to auto-cleanup container on exit
+with runtime as r:
+    r.wait()
+    print("Docker runtime is ready.")
 
-agent = ChatAgent(
-    task_prompt,
-    model,
-    tools=tools,
-)
+    tools = r.get_tools()
+    agent = ChatAgent(
+        task_prompt,
+        model,
+        tools=tools,
+    )
 
-result = benchmark.run(agent, "valid", level="all", subset=3)
-print("correct:", result["correct"])
-print("total:", result["total"])
+    result = benchmark.run(agent, "valid", level="all", subset=10)
+    print("correct:", result["correct"])
+    print("total:", result["total"])
 
 # ruff: noqa: E501
 """
diff --git a/examples/runtimes/ubuntu_docker_runtime/Dockerfile b/examples/runtimes/ubuntu_docker_runtime/Dockerfile
@@ -24,7 +24,7 @@ RUN pip3 install -e ".[all]"
 
 # Copy API service file
 RUN mkdir -p /home
-COPY camel/runtime/api.py /home/api.py
+COPY camel/runtimes/api.py /home/api.py
 
 # Set Python path and other environment variables
 ENV PYTHONPATH=/app/camel
diff --git a/examples/runtimes/ubuntu_docker_runtime/manage_camel_docker.sh b/examples/runtimes/ubuntu_docker_runtime/manage_camel_docker.sh
@@ -35,18 +35,18 @@ build_image() {
     cp -r "$CAMEL_ROOT" "$TEMP_DIR/camel_source"
     
     # Ensure API file exists
-    if [ ! -f "$CAMEL_ROOT/camel/runtime/api.py" ]; then
-        echo "Error: API file not found at $CAMEL_ROOT/camel/runtime/api.py"
+    if [ ! -f "$CAMEL_ROOT/camel/runtimes/api.py" ]; then
+        echo "Error: API file not found at $CAMEL_ROOT/camel/runtimes/api.py"
         exit 1
     fi
-    
+
     # Copy API file to a known location
     mkdir -p "$TEMP_DIR/api"
-    cp "$CAMEL_ROOT/camel/runtime/api.py" "$TEMP_DIR/api/"
-    
+    cp "$CAMEL_ROOT/camel/runtimes/api.py" "$TEMP_DIR/api/"
+
     # Modify Dockerfile COPY commands - fix the sed command
     sed -i '' 's|COPY ../../../|COPY camel_source/|g' "$TEMP_DIR/Dockerfile"
-    sed -i '' 's|COPY camel/runtime/api.py|COPY api/api.py|g' "$TEMP_DIR/Dockerfile"
+    sed -i '' 's|COPY camel/runtimes/api.py|COPY api/api.py|g' "$TEMP_DIR/Dockerfile"
     
     # Build in temporary directory
     (cd "$TEMP_DIR" && docker build -t ${FULL_NAME} .)