Skip to content

Commit fd6e5df

Browse files
authored
Merge branch 'main' into phase1-namespace-auth-change
2 parents 01d0059 + 78118b1 commit fd6e5df

File tree

17 files changed

+141
-471
lines changed

17 files changed

+141
-471
lines changed

tests/conftest.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,10 +201,13 @@ def ci_s3_bucket_endpoint(pytestconfig: pytest.Config) -> str:
201201

202202

203203
@pytest.fixture(scope="session")
204-
def serving_argument(pytestconfig: pytest.Config, modelcar_yaml_config: dict[str, Any] | None) -> list[str]:
204+
def serving_argument(pytestconfig: pytest.Config, modelcar_yaml_config: dict[str, Any] | None) -> tuple[list[str], int]:
205205
if modelcar_yaml_config:
206-
arg = modelcar_yaml_config.get("serving_argument", [])
207-
return arg if isinstance(arg, list) else [arg]
206+
val = modelcar_yaml_config.get("serving_arguments", {})
207+
if isinstance(val, dict):
208+
args = val.get("args", [])
209+
gpu_count = val.get("gpu_count", 1)
210+
return args, gpu_count
208211

209212
raw_arg = pytestconfig.option.serving_argument
210213
try:

tests/model_explainability/lm_eval/test_lm_eval.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,23 @@
99

1010
LMEVALJOB_COMPLETE_STATE: str = "Complete"
1111

12-
LMEVAL_TASKS: List[str] = get_lmeval_tasks(min_downloads=10000)
12+
TIER1_LMEVAL_TASKS: List[str] = get_lmeval_tasks(min_downloads=10000)
13+
14+
TIER2_LMEVAL_TASKS: List[str] = list(
15+
set(get_lmeval_tasks(min_downloads=0.70, max_downloads=10000)) - set(TIER1_LMEVAL_TASKS)
16+
)
1317

1418

1519
@pytest.mark.parametrize(
1620
"model_namespace, lmevaljob_hf",
1721
[
1822
pytest.param(
19-
{"name": "test-lmeval-hf"},
20-
{"task_list": {"taskNames": LMEVAL_TASKS}},
23+
{"name": "test-lmeval-hf-tier1"},
24+
{"task_list": {"taskNames": TIER1_LMEVAL_TASKS}},
25+
),
26+
pytest.param(
27+
{"name": "test-lmeval-hf-tier2"},
28+
{"task_list": {"taskNames": TIER2_LMEVAL_TASKS}},
2129
),
2230
pytest.param(
2331
{"name": "test-lmeval-hf-custom-task"},

tests/model_explainability/lm_eval/utils.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,30 +36,46 @@ def get_lmevaljob_pod(client: DynamicClient, lmevaljob: LMEvalJob, timeout: int
3636
return lmeval_pod
3737

3838

39-
def get_lmeval_tasks(min_downloads: int = 10000) -> List[str]:
39+
def get_lmeval_tasks(min_downloads: int | float, max_downloads: int | float | None = None) -> List[str]:
4040
"""
4141
Gets the list of supported LM-Eval tasks that have above a certain number of minimum downloads on HuggingFace.
4242
4343
Args:
44-
min_downloads: The minimum number of downloads
44+
min_downloads: The minimum number of downloads or the percentile of downloads to use as a minimum
45+
max_downloads: The maximum number of downloads or the percentile of downloads to use as a maximum
4546
4647
Returns:
4748
List of LM-Eval task names
4849
"""
49-
if min_downloads < 1:
50+
if min_downloads <= 0:
5051
raise ValueError("Minimum downloads must be greater than 0")
5152

5253
lmeval_tasks = pd.read_csv(filepath_or_buffer="tests/model_explainability/lm_eval/data/new_task_list.csv")
5354

54-
# filter for tasks that either exceed (min_downloads OR exist on the OpenLLM leaderboard)
55-
# AND exist on LMEval AND do not include image data
55+
if isinstance(min_downloads, float):
56+
if not 0 <= min_downloads <= 1:
57+
raise ValueError("Minimum downloads as a percentile must be between 0 and 1")
58+
min_downloads = lmeval_tasks["HF dataset downloads"].quantile(q=min_downloads)
5659

60+
# filter for tasks that either exceed min_downloads OR exist on the OpenLLM leaderboard
61+
# AND exist on LMEval AND do not include image data
5762
filtered_df = lmeval_tasks[
5863
lmeval_tasks["Exists"]
5964
& (lmeval_tasks["Dataset"] != "MMMU/MMMU")
6065
& ((lmeval_tasks["HF dataset downloads"] >= min_downloads) | (lmeval_tasks["OpenLLM leaderboard"]))
6166
]
6267

68+
# if max_downloads is provided, filter for tasks that have less than
69+
# or equal to the maximum number of downloads
70+
if max_downloads is not None:
71+
if max_downloads <= 0 or max_downloads > max(lmeval_tasks["HF dataset downloads"]):
72+
raise ValueError("Maximum downloads must be greater than 0 and less than the maximum number of downloads")
73+
if isinstance(max_downloads, float):
74+
if not 0 <= max_downloads <= 1:
75+
raise ValueError("Maximum downloads as a percentile must be between 0 and 1")
76+
max_downloads = lmeval_tasks["HF dataset downloads"].quantile(q=max_downloads)
77+
filtered_df = filtered_df[filtered_df["HF dataset downloads"] <= max_downloads]
78+
6379
# group tasks by dataset and extract the task with shortest name in the group
6480
unique_tasks = filtered_df.loc[filtered_df.groupby("Dataset")["Name"].apply(lambda x: x.str.len().idxmin())]
6581

tests/model_serving/model_runtime/model_validation/__snapshots__/test_modelvalidation/TestVLLMModelcarRaw.test_oci_modelcar_raw_openai_inference[modelcar-granite-3-1-8b-base-quantized-w4a161.5-raw].1.json

Lines changed: 0 additions & 50 deletions
This file was deleted.

tests/model_serving/model_runtime/model_validation/__snapshots__/test_modelvalidation/TestVLLMModelcarRaw.test_oci_modelcar_raw_openai_inference[modelcar-granite-3-1-8b-base-quantized-w4a161.5-raw].json

Lines changed: 0 additions & 24 deletions
This file was deleted.

tests/model_serving/model_runtime/model_validation/__snapshots__/test_modelvalidation/TestVLLMModelcarRaw.test_oci_modelcar_raw_openai_inference[modelcar-llama-3-1-8b-instruct1.5-raw].1.json

Lines changed: 0 additions & 50 deletions
This file was deleted.

tests/model_serving/model_runtime/model_validation/__snapshots__/test_modelvalidation/TestVLLMModelcarRaw.test_oci_modelcar_raw_openai_inference[modelcar-llama-3-1-8b-instruct1.5-raw].json

Lines changed: 0 additions & 24 deletions
This file was deleted.

tests/model_serving/model_runtime/model_validation/__snapshots__/test_modelvalidation/TestVLLMModelcarRaw.test_oci_modelcar_raw_openai_inference[modelcar-mistral-7b-instruct-v0-3-quantized-w4a161.5-raw].1.json

Lines changed: 0 additions & 50 deletions
This file was deleted.

tests/model_serving/model_runtime/model_validation/__snapshots__/test_modelvalidation/TestVLLMModelcarRaw.test_oci_modelcar_raw_openai_inference[modelcar-mistral-7b-instruct-v0-3-quantized-w4a161.5-raw].json

Lines changed: 0 additions & 24 deletions
This file was deleted.

tests/model_serving/model_runtime/model_validation/__snapshots__/test_modelvalidation/TestVLLMModelcarServerless.test_oci_modelcar_serverless_openai_inference[modelcar-granite-3-1-8b-base-quantized-w4a161.5-serverless].1.json

Lines changed: 0 additions & 50 deletions
This file was deleted.

0 commit comments

Comments
 (0)