Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions camel/benchmarks/apibank.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional

import numpy as np
from rouge import Rouge
from tqdm import tqdm

from camel.agents import ChatAgent
from camel.benchmarks.base import BaseBenchmark
from camel.messages import BaseMessage
Expand Down Expand Up @@ -138,6 +134,8 @@ def load(self, level: str, force_download: bool = False): # type: ignore[overri
jsonl_files = [
f for f in os.listdir(file_path) if f.endswith('.jsonl')
]
from tqdm import tqdm

for file in tqdm(jsonl_files, desc="Processing files"):
history = []
with open(file_path / file, 'r') as f:
Expand Down Expand Up @@ -240,6 +238,8 @@ def run( # type: ignore[override, return]
total_api_calls, correct_api_calls, rougel_scores = 0, 0, []

with open(self.save_to, "w") as f:
from tqdm import tqdm

for test in tqdm(datas, desc="Running"):
samples = self._data[test]
evaluator = Evaluator(samples) # type: ignore[arg-type]
Expand Down Expand Up @@ -380,6 +380,8 @@ def run( # type: ignore[override, return]
else 0,
}
elif dialog_test_enabled:
import numpy as np

return {'Dialog_score': np.mean(rougel_scores)}


Expand Down Expand Up @@ -415,6 +417,8 @@ def agent_call(messages: List[Dict], agent: ChatAgent):

def calculate_rouge_l_score(reference, hypothesis):
r"""Calculate rouge l score between hypothesis and reference."""
from rouge import Rouge

rouge = Rouge()
scores = rouge.get_scores(hypothesis, reference)
rouge_l_score = scores[0]['rouge-l']['f']
Expand Down
9 changes: 5 additions & 4 deletions camel/benchmarks/apibench.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@
from pathlib import Path
from typing import Any, Dict, Literal, Optional

import tree_sitter_python as tspython
from tqdm import tqdm
from tree_sitter import Language, Parser

from camel.agents import ChatAgent
from camel.benchmarks.base import BaseBenchmark
from camel.utils import download_github_subdirectory
Expand Down Expand Up @@ -278,6 +274,8 @@ def run( # type: ignore[override]
self._results = []

with open(self.save_to, "w") as f:
from tqdm import tqdm

for question in tqdm(datas, desc="Running"):
prompt = encode_question(question["text"], dataset_name)
try:
Expand Down Expand Up @@ -376,6 +374,9 @@ def get_all_sub_trees(root_node):

# Parse the program into AST trees
def ast_parse(candidate):
import tree_sitter_python as tspython
from tree_sitter import Language, Parser

PY_LANGUAGE = Language(tspython.language())
parser = Parser(PY_LANGUAGE)

Expand Down
4 changes: 2 additions & 2 deletions camel/benchmarks/gaia.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Protocol, Union

from tqdm import tqdm

from camel.agents import ChatAgent
from camel.benchmarks.base import BaseBenchmark
from camel.messages import BaseMessage
Expand Down Expand Up @@ -261,6 +259,8 @@ def run( # type: ignore[override]

# Process tasks
with open(self.save_to, "w") as f:
from tqdm import tqdm

for task in tqdm(datas, desc="Running"):
if not self._prepare_task(task):
continue
Expand Down
11 changes: 7 additions & 4 deletions camel/benchmarks/nexus.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,9 @@
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Tuple, Union

import pandas as pd
from datasets import load_dataset
from tqdm import tqdm

from camel.agents import ChatAgent
from camel.benchmarks.base import BaseBenchmark
from camel.utils import dependencies_required

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -89,6 +86,7 @@ class NexusBenchmark(BaseBenchmark):
(default: :obj:`1`)
"""

@dependencies_required('datasets', 'pandas')
def __init__(
self,
data_dir: str,
Expand Down Expand Up @@ -126,6 +124,7 @@ def load(self, dataset_name: str, force_download: bool = False): # type: ignore
dataset_name (str): Name of the specific dataset to be loaded.
force_download (bool): Whether to force download the data.
"""
import pandas as pd

def _load_csv_data(dataset_dir: Path) -> List:
r"""Load datasets from CSV files."""
Expand Down Expand Up @@ -306,6 +305,8 @@ def run( # type: ignore[override, return]
# Process samples
tools = construct_tool_descriptions(task)
with open(self.save_to, "w") as f:
from tqdm import tqdm

for sample in tqdm(datas, desc="Running"):
prompt = construct_prompt(input=sample.input, tools=tools)
ground_truth_call = sample.output
Expand Down Expand Up @@ -363,6 +364,8 @@ def run( # type: ignore[override, return]
def construct_tool_descriptions(dataset_name: str) -> str:
r"""Construct tool descriptions from function definitions and
descriptions."""
from datasets import load_dataset

tool_dataset_mapping = {
"NVDLibrary": "CVECPE",
"VirusTotal": "VirusTotal",
Expand Down
39 changes: 29 additions & 10 deletions camel/benchmarks/ragbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,25 @@
# limitations under the License.
# ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========

from typing import Any, Callable, Dict, List, Literal, Optional, Sequence

import numpy as np
from datasets import Dataset, load_dataset
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Literal,
Optional,
Sequence,
)

if TYPE_CHECKING:
from datasets import Dataset

from camel.agents import ChatAgent
from camel.benchmarks import BaseBenchmark
from camel.logger import get_logger
from camel.retrievers import AutoRetriever
from camel.utils import dependencies_required

logger = get_logger(__name__)

Expand All @@ -34,10 +44,10 @@ class RagasFields:


def annotate_dataset(
dataset: Dataset,
dataset: 'Dataset',
context_call: Optional[Callable[[Dict[str, Any]], List[str]]],
answer_call: Optional[Callable[[Dict[str, Any]], str]],
) -> Dataset:
) -> 'Dataset':
r"""Annotate the dataset by adding context and answers using the provided
functions.

Expand Down Expand Up @@ -75,6 +85,8 @@ def rmse(
Returns:
Optional[float]: RMSE value, or None if inputs have different lengths.
"""
import numpy as np

if len(input_trues) != len(input_preds):
logger.warning("Input lengths mismatch in RMSE calculation")
return None
Expand Down Expand Up @@ -104,6 +116,7 @@ def auroc(trues: Sequence[bool], preds: Sequence[float]) -> float:
Returns:
float: AUROC score.
"""
import numpy as np
from sklearn.metrics import roc_auc_score # type: ignore[import-untyped]

eval_idx = ~np.isnan(preds)
Expand All @@ -117,7 +130,7 @@ def auroc(trues: Sequence[bool], preds: Sequence[float]) -> float:


def ragas_calculate_metrics(
dataset: Dataset,
dataset: 'Dataset',
pred_context_relevance_field: Optional[str],
pred_faithfulness_field: Optional[str],
metrics_to_evaluate: Optional[List[str]] = None,
Expand All @@ -141,6 +154,8 @@ def ragas_calculate_metrics(
Returns:
Dict[str, Optional[float]]: Dictionary of calculated metrics.
"""
import numpy as np

metrics_to_evaluate = metrics_to_evaluate or [
"context_relevancy",
"faithfulness",
Expand Down Expand Up @@ -172,11 +187,11 @@ def ragas_calculate_metrics(


def ragas_evaluate_dataset(
dataset: Dataset,
dataset: 'Dataset',
contexts_field_name: Optional[str],
answer_field_name: Optional[str],
metrics_to_evaluate: Optional[List[str]] = None,
) -> Dataset:
) -> 'Dataset':
r"""Evaluate the dataset using RAGAS metrics.

Args:
Expand All @@ -188,6 +203,7 @@ def ragas_evaluate_dataset(
Returns:
Dataset: Dataset with added evaluation metrics.
"""
from datasets import Dataset
from ragas import evaluate # type: ignore[import]
from ragas.metrics import ( # type: ignore[import]
context_relevancy,
Expand Down Expand Up @@ -236,6 +252,7 @@ class RAGBenchBenchmark(BaseBenchmark):
split (str, optional): Dataset split to use (e.g., "test").
"""

@dependencies_required('datasets')
def __init__(
self,
processes: int = 1,
Expand All @@ -258,10 +275,12 @@ def __init__(
super().__init__("ragbench", "rag_bench", "", processes)
self.subset = subset
self.split = split
self.dataset: Optional[Dataset] = None
self.dataset: Optional['Dataset'] = None

def download(self):
r"""Download the RAGBench dataset."""
from datasets import load_dataset

try:
self.dataset = load_dataset(
"rungalileo/ragbench", self.subset, split=self.split
Expand Down
4 changes: 2 additions & 2 deletions camel/datagen/evol_instruct/evol_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
from math import ceil
from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast

from tqdm import tqdm

from camel.agents import ChatAgent
from camel.datagen.evol_instruct.scorer import BaseScorer, GeneralScorer
from camel.datagen.evol_instruct.templates import EvolInstructTemplates
Expand Down Expand Up @@ -413,6 +411,8 @@ def _process_prompt(
plan_chunk = evolution_plan[chunk_idx : chunk_idx + chunk_size]

with ThreadPoolExecutor(max_workers=num_threads) as executor:
from tqdm import tqdm

chunk_results = list(
tqdm(
executor.map(_process_prompt, zip(chunk, plan_chunk)),
Expand Down
4 changes: 2 additions & 2 deletions camel/datagen/self_instruct/filter/filter_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
from abc import ABC, abstractmethod
from typing import List

from rouge import Rouge

from camel.models.reward import BaseRewardModel


Expand Down Expand Up @@ -136,6 +134,8 @@ class RougeSimilarityFilter(FilterFunction):
def __init__(
self, existing_instructions: List[str], threshold: float = 0.7
):
from rouge import Rouge

self.existing_instructions = existing_instructions
self.threshold = threshold
self.rouge = Rouge()
Expand Down
4 changes: 2 additions & 2 deletions camel/datagen/source2synth/data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
import random
from typing import Any, Dict, List, Optional, Sequence

from tqdm import tqdm

from camel.agents.multi_hop_generator_agent import MultiHopGeneratorAgent
from camel.datagen.source2synth.user_data_processor_config import (
ProcessorConfig,
Expand Down Expand Up @@ -172,6 +170,8 @@ def construct_examples(
logger.info("Starting to construct training examples...")
examples = []

from tqdm import tqdm

for data in tqdm(raw_data, desc="Constructing examples"):
# 1. Text preprocessing
processed_text = self._preprocess_text(data.get('text', ''))
Expand Down
9 changes: 8 additions & 1 deletion camel/datasets/base_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@
from typing import Any, Dict, List, Union

from pydantic import ValidationError
from torch.utils.data import IterableDataset

try:
from torch.utils.data import IterableDataset
except ImportError:

class IterableDataset: # type: ignore[no-redef]
r"""Fallback when PyTorch is not installed."""
pass

from camel.logger import get_logger

Expand Down
18 changes: 15 additions & 3 deletions camel/datasets/static_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from collections.abc import Sequence
from pathlib import Path
from typing import (
TYPE_CHECKING,
Any,
Dict,
List,
Expand All @@ -24,9 +25,20 @@
Union,
)

from datasets import Dataset as HFDataset
from pydantic import ValidationError
from torch.utils.data import Dataset

try:
from datasets import Dataset as HFDataset
except ImportError:
HFDataset = None # type: ignore[assignment,misc]

try:
from torch.utils.data import Dataset
except ImportError:

class Dataset: # type: ignore[no-redef]
r"""Fallback when PyTorch is not installed."""
pass

from camel.logger import get_logger

Expand Down Expand Up @@ -116,7 +128,7 @@ def _init_data(
ValueError: If the Path has an unsupported file extension.
"""

if isinstance(data, HFDataset):
if HFDataset is not None and isinstance(data, HFDataset):
raw_data = self._init_from_hf_dataset(data)
elif isinstance(data, Dataset):
raw_data = self._init_from_pytorch_dataset(data)
Expand Down
Loading