Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions test/common/llmperf/run_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ def run_mindie(args, prefill_input):
except Exception as e:
print(f"[Warning] Case {ROUND_COUNTER} failed: {e}")
failed_case.append(ROUND_COUNTER)
raise e

return summary, failed_case

Expand Down
2 changes: 1 addition & 1 deletion test/common/uc_eval/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

- 用途:在计算f1-score时使用
- **下载地址**:[GitHub - goto456/stopwords: 中文常用停用词表(哈工大停用词表、百度停用词表等)](https://github.com/goto456/stopwords)
- **放置位置**:以 `cn_stopwords.txt` 文件为例,下载后将其放置在 `test/common/uc_eval/utils` 目录下,并重命名为 `stopwords.txt`
- **配置文件路径**:以 `cn_stopwords.txt` 文件为例,修改配置文件 `test/config.yaml` 中的 `dataset.stopwords` 配置项

## 日志配置

Expand Down
8 changes: 4 additions & 4 deletions test/common/uc_eval/utils/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def create_session(self, max_parallel_num: int):
return session

def handle_requests_with_pool(
self, prompt_list: List, parallel_num: int, max_tokens: int
self, prompt_list: List, parallel_num: int, max_tokens: Optional[int]
) -> List[RequestRecord]:
return _excute_with_pool(
task_func=lambda prompt: self.send_request(prompt, max_tokens),
Expand All @@ -117,14 +117,14 @@ def send_request(self, prompt, max_tokens) -> List[RequestRecord]:
record = self.do_request(payload, record)
return record

def _update_payload(self, prompt, max_tokens) -> Dict:
def _update_payload(self, prompt, max_tokens: Optional[int]) -> Dict:
"""
update request payload
"""
payload = copy.deepcopy(self.payload)
payload.update({"model": self.served_model_name})
# If payload already has default max_tokens, the input max_tokens will be set to 0
if max_tokens > 0:
# `max_tokens` may be None when payload does not define this key.
if max_tokens is not None and max_tokens > 0:
payload.update({"max_tokens": max_tokens})
if isinstance(prompt, str):
# If the length of input_ids is greater than max_seq_length, we need to split it
Expand Down
7 changes: 6 additions & 1 deletion test/common/uc_eval/utils/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,14 @@

import jieba
import numpy as np
from common.config_utils import config_utils as config_instance
from common.uc_eval.utils.data_class import MultiTurnDialogRecord, RequestRecord

stopwords_path = Path(__file__).parent.joinpath("stopwords.txt")
stopwords_path = Path(
config_instance.get_nested_config(
"dataset.stopwords", "/mnt/private/dataset/uc-eval/cn_stopwords.txt"
)
)
STOPWORDS: List[str] = [
line.strip() for line in stopwords_path.open("r", encoding="utf-8").readlines()
]
Expand Down
4 changes: 4 additions & 0 deletions test/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ llm_connection:
timeout: 180 # request time out, default 180
extra_info: "vllm_qwen3-32b_pc-gsa" # extra info, Used to mark different service pull-up parameters

dataset:
data_file: "/mnt/private/dataset/uc-eval/multifieldqa_zh.jsonl"
stopwords: "/mnt/private/dataset/uc-eval/cn_stopwords.txt"

# Environment Pre-Check Configuration
Env_preCheck:
master_ip: 192.168.0.1
Expand Down
28 changes: 18 additions & 10 deletions test/suites/E2E/test_model_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
from common.uc_eval.utils.data_class import EvalConfig, ModelConfig

# Global test configuration constants
DATA_FILE_PATH = "common/uc_eval/utils/multifieldqa_zh.jsonl"
DATA_FILE_PATH = config_instance.get_nested_config(
"dataset.data_file", "/mnt/private/dataset/uc-eval/multifieldqa_zh.jsonl"
)
MEAN_INPUT_TOKENS = 8000
MEAN_OUTPUT_TOKENS = 200
MAX_NUM_COMPLETED_REQUESTS = 8
Expand Down Expand Up @@ -111,15 +113,21 @@ def test_model_validate_sparse(self, model_config: ModelConfig) -> None:

def _run_perf_test(self, hit_rates: List[int]) -> List[Dict[str, Any]]:
"""Run inference under specified cache hit rates and extract performance metrics."""
n = len(hit_rates)
all_summaries = inference_results(
mean_input_tokens=[MEAN_INPUT_TOKENS] * n,
mean_output_tokens=[MEAN_OUTPUT_TOKENS] * n,
max_num_completed_requests=[MAX_NUM_COMPLETED_REQUESTS] * n,
concurrent_requests=[CONCURRENT_REQUESTS] * n,
additional_sampling_params=[ADDITIONAL_SAMPLING_PARAMS] * n,
hit_rate=hit_rates,
)
# `inference_results` executes one scenario per call in current implementation.
all_summaries: List[Dict[str, Any]] = []
total_counter = len(hit_rates)
for round_counter, hr in enumerate(hit_rates, start=1):
summary = inference_results(
mean_input_tokens=[MEAN_INPUT_TOKENS],
mean_output_tokens=[MEAN_OUTPUT_TOKENS],
max_num_completed_requests=[MAX_NUM_COMPLETED_REQUESTS],
concurrent_requests=[CONCURRENT_REQUESTS],
random_seed=[0],
hit_rate=[hr],
TOTAL_COUNTER=total_counter,
ROUND_COUNTER=round_counter,
)
all_summaries.append(summary)
return self._extract_perf_metrics(all_summaries, hit_rates)

def _fetch_naive_result(self) -> Optional[Dict[str, Any]]:
Expand Down
Loading