Skip to content

Commit 348d9a2

Browse files
Fix: Gaia bench update (#3482)
Co-authored-by: Wendong-Fan <[email protected]>
1 parent 4f0a4bf commit 348d9a2

File tree

6 files changed

+71
-38
lines changed

6 files changed

+71
-38
lines changed

camel/benchmarks/gaia.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,8 @@ def load(self, force_download=False):
165165
force_download (bool, optional): Whether to
166166
force download the data.
167167
"""
168+
import pandas as pd
169+
168170
if force_download:
169171
logger.info("Force downloading data.")
170172
self.download()
@@ -181,15 +183,17 @@ def load(self, force_download=False):
181183
# Load metadata for both validation and test datasets
182184
for path, label in zip([valid_dir, test_dir], ["valid", "test"]):
183185
self._data[label] = []
184-
with open(path / "metadata.jsonl", "r") as f:
185-
lines = f.readlines()
186-
for line in lines:
187-
data = json.loads(line)
188-
if data["task_id"] == "0-0-0-0-0":
189-
continue
190-
if data["file_name"]:
191-
data["file_name"] = path / data["file_name"]
192-
self._data[label].append(data)
186+
metadata_file = path / "metadata.parquet"
187+
df = pd.read_parquet(metadata_file)
188+
for _, row in df.iterrows():
189+
data = row.to_dict()
190+
if data["task_id"] == "0-0-0-0-0":
191+
continue
192+
# convert level to int (parquet stores as string)
193+
data["Level"] = int(data["Level"])
194+
if data["file_name"]:
195+
data["file_name"] = path / data["file_name"]
196+
self._data[label].append(data)
193197
return self
194198

195199
@property
@@ -333,7 +337,7 @@ def _process_result(
333337
}
334338
self._results.append(result_data)
335339
file_obj.write(
336-
json.dumps(result_data, indent=2) + "\n", ensure_ascii=False
340+
json.dumps(result_data, indent=2, ensure_ascii=False) + "\n"
337341
)
338342
file_obj.flush()
339343

@@ -354,7 +358,7 @@ def _handle_error(
354358
}
355359
self._results.append(error_data)
356360
file_obj.write(
357-
json.dumps(error_data, indent=2) + "\n", ensure_ascii=False
361+
json.dumps(error_data, indent=2, ensure_ascii=False) + "\n"
358362
)
359363
file_obj.flush()
360364

camel/toolkits/pptx_toolkit.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -680,8 +680,8 @@ def _handle_step_by_step_process(
680680
slide_width_inch (float): The width of the slide in inches.
681681
slide_height_inch (float): The height of the slide in inches.
682682
"""
683-
import pptx
684683
from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE
684+
from pptx.enum.text import MSO_ANCHOR, PP_ALIGN
685685
from pptx.util import Inches, Pt
686686

687687
steps = slide_json['bullet_points']
@@ -710,8 +710,8 @@ def _handle_step_by_step_process(
710710
text_frame = shape.text_frame
711711
text_frame.clear()
712712
paragraph = text_frame.paragraphs[0]
713-
paragraph.alignment = pptx.enum.text.PP_ALIGN.CENTER
714-
text_frame.vertical_anchor = pptx.enum.text.MSO_ANCHOR.MIDDLE
713+
paragraph.alignment = PP_ALIGN.CENTER
714+
text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
715715
self._format_text(
716716
paragraph, step.removeprefix(STEP_BY_STEP_PROCESS_MARKER)
717717
)
@@ -732,8 +732,8 @@ def _handle_step_by_step_process(
732732
text_frame = shape.text_frame
733733
text_frame.clear()
734734
paragraph = text_frame.paragraphs[0]
735-
paragraph.alignment = pptx.enum.text.PP_ALIGN.CENTER
736-
text_frame.vertical_anchor = pptx.enum.text.MSO_ANCHOR.MIDDLE
735+
paragraph.alignment = PP_ALIGN.CENTER
736+
text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
737737
self._format_text(
738738
paragraph, step.removeprefix(STEP_BY_STEP_PROCESS_MARKER)
739739
)

camel/toolkits/resend_toolkit.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# limitations under the License.
1313
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
1414

15-
from typing import Dict, List, Optional, cast
15+
from typing import Dict, List, Optional
1616

1717
from camel.toolkits.base import BaseToolkit
1818
from camel.toolkits.function_tool import FunctionTool
@@ -141,7 +141,7 @@ def send_email(
141141
if reply_to:
142142
params["reply_to"] = reply_to
143143
if tags:
144-
params["tags"] = cast('list[resend.emails._tag.Tag]', tags)
144+
params["tags"] = tags # type: ignore[typeddict-item]
145145
if headers:
146146
params["headers"] = headers
147147

examples/benchmarks/gaia.py

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,39 @@
1111
# See the License for the specific language governing permissions and
1212
# limitations under the License.
1313
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14+
"""
15+
GAIA Benchmark Example
16+
17+
Prerequisites:
18+
1. Docker Desktop installed and running
19+
2. Build the Docker image (one-time setup):
20+
cd examples/runtimes/ubuntu_docker_runtime
21+
./manage_camel_docker.sh build
22+
23+
3. Set environment variables in .env:
24+
- OPENAI_API_KEY (or other API keys)
25+
26+
4. Clean up stale containers if needed:
27+
docker stop $(docker ps -q) && docker rm $(docker ps -aq)
28+
"""
1429

30+
from dotenv import load_dotenv
1531

1632
from camel.agents import ChatAgent
1733
from camel.benchmarks import DefaultGAIARetriever, GAIABenchmark
34+
from camel.configs import ChatGPTConfig
35+
from camel.embeddings import AzureEmbedding
1836
from camel.models import ModelFactory
19-
from camel.runtimes import RemoteHttpRuntime
37+
from camel.runtimes import DockerRuntime
2038
from camel.toolkits import CodeExecutionToolkit
2139
from camel.types import ModelPlatformType, ModelType, StorageType
2240

41+
load_dotenv()
42+
2343
retriever = DefaultGAIARetriever(
24-
vector_storage_local_path="local_data2/", storage_type=StorageType.QDRANT
44+
vector_storage_local_path="local_data2/",
45+
storage_type=StorageType.QDRANT,
46+
embedding_model=AzureEmbedding(),
2547
)
2648

2749
benchmark = GAIABenchmark(
@@ -36,9 +58,12 @@
3658

3759

3860
toolkit = CodeExecutionToolkit(verbose=True)
39-
runtime = RemoteHttpRuntime("localhost").add(
61+
runtime = DockerRuntime(
62+
"my-camel", port=0
63+
).add( # port=0 uses random available port
4064
toolkit.get_tools(),
4165
"camel.toolkits.CodeExecutionToolkit",
66+
dict(verbose=True),
4267
)
4368

4469
task_prompt = """
@@ -57,23 +82,27 @@
5782
a string.
5883
""".strip()
5984

60-
tools = runtime.get_tools()
61-
6285
model = ModelFactory.create(
6386
model_platform=ModelPlatformType.DEFAULT,
6487
model_type=ModelType.DEFAULT,
88+
model_config_dict=ChatGPTConfig().as_dict(),
6589
)
6690

91+
# use context manager to auto-cleanup container on exit
92+
with runtime as r:
93+
r.wait()
94+
print("Docker runtime is ready.")
6795

68-
agent = ChatAgent(
69-
task_prompt,
70-
model,
71-
tools=tools,
72-
)
96+
tools = r.get_tools()
97+
agent = ChatAgent(
98+
task_prompt,
99+
model,
100+
tools=tools,
101+
)
73102

74-
result = benchmark.run(agent, "valid", level="all", subset=3)
75-
print("correct:", result["correct"])
76-
print("total:", result["total"])
103+
result = benchmark.run(agent, "valid", level="all", subset=10)
104+
print("correct:", result["correct"])
105+
print("total:", result["total"])
77106

78107
# ruff: noqa: E501
79108
"""

examples/runtimes/ubuntu_docker_runtime/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ RUN pip3 install -e ".[all]"
2424

2525
# Copy API service file
2626
RUN mkdir -p /home
27-
COPY camel/runtime/api.py /home/api.py
27+
COPY camel/runtimes/api.py /home/api.py
2828

2929
# Set Python path and other environment variables
3030
ENV PYTHONPATH=/app/camel

examples/runtimes/ubuntu_docker_runtime/manage_camel_docker.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,18 @@ build_image() {
3535
cp -r "$CAMEL_ROOT" "$TEMP_DIR/camel_source"
3636

3737
# Ensure API file exists
38-
if [ ! -f "$CAMEL_ROOT/camel/runtime/api.py" ]; then
39-
echo "Error: API file not found at $CAMEL_ROOT/camel/runtime/api.py"
38+
if [ ! -f "$CAMEL_ROOT/camel/runtimes/api.py" ]; then
39+
echo "Error: API file not found at $CAMEL_ROOT/camel/runtimes/api.py"
4040
exit 1
4141
fi
42-
42+
4343
# Copy API file to a known location
4444
mkdir -p "$TEMP_DIR/api"
45-
cp "$CAMEL_ROOT/camel/runtime/api.py" "$TEMP_DIR/api/"
46-
45+
cp "$CAMEL_ROOT/camel/runtimes/api.py" "$TEMP_DIR/api/"
46+
4747
# Modify Dockerfile COPY commands - fix the sed command
4848
sed -i '' 's|COPY ../../../|COPY camel_source/|g' "$TEMP_DIR/Dockerfile"
49-
sed -i '' 's|COPY camel/runtime/api.py|COPY api/api.py|g' "$TEMP_DIR/Dockerfile"
49+
sed -i '' 's|COPY camel/runtimes/api.py|COPY api/api.py|g' "$TEMP_DIR/Dockerfile"
5050

5151
# Build in temporary directory
5252
(cd "$TEMP_DIR" && docker build -t ${FULL_NAME} .)

0 commit comments

Comments
 (0)